feat(pseo): add pSEO Engine admin tab
Operational dashboard at /admin/pseo for the programmatic SEO system: content gap detection, data freshness signals, article health checks (hreflang orphans, missing build files, broken scenario refs), and live generation job monitoring with HTMX progress bars. - _serving_meta.json written by export_serving.py after atomic DB swap - content/health.py: pure async query functions for all health checks - Migration 0021: progress_current/total/error_log on tasks table - generate_articles() writes progress every 50 articles + on completion - admin/pseo_routes.py: 6 routes, standalone blueprint - 5 HTML templates + sidebar nav + fromjson Jinja filter - 45 tests (all passing); 2 bugs caught and fixed during testing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> # Conflicts: # src/padelnomics/export_serving.py
This commit is contained in:
209
web/src/padelnomics/admin/pseo_routes.py
Normal file
209
web/src/padelnomics/admin/pseo_routes.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
pSEO Engine admin blueprint.
|
||||
|
||||
Operational visibility for the programmatic SEO system:
|
||||
/admin/pseo/ → dashboard (template stats, freshness, recent jobs)
|
||||
/admin/pseo/health → HTMX partial: health issues
|
||||
/admin/pseo/gaps/<slug> → HTMX partial: content gaps for one template
|
||||
/admin/pseo/gaps/<slug>/generate → POST: enqueue gap-fill job
|
||||
/admin/pseo/jobs → recent generation jobs
|
||||
/admin/pseo/jobs/<id>/status → HTMX polled: progress bar for one job
|
||||
|
||||
Registered as a standalone blueprint so admin/routes.py (already ~2,100 lines)
|
||||
stays focused on its own domain.
|
||||
"""
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from quart import Blueprint, flash, redirect, render_template, url_for
|
||||
|
||||
from ..auth.routes import role_required
|
||||
from ..content import discover_templates, load_template
|
||||
from ..content.health import (
|
||||
get_all_health_issues,
|
||||
get_content_gaps,
|
||||
get_template_freshness,
|
||||
get_template_stats,
|
||||
)
|
||||
from ..core import csrf_protect, fetch_all, fetch_one
|
||||
|
||||
bp = Blueprint(
|
||||
"pseo",
|
||||
__name__,
|
||||
template_folder=str(Path(__file__).parent / "templates"),
|
||||
url_prefix="/admin/pseo",
|
||||
)
|
||||
|
||||
|
||||
@bp.before_request
|
||||
async def _inject_sidebar_data():
|
||||
"""Load unread inbox count for the admin sidebar badge."""
|
||||
from quart import g
|
||||
|
||||
try:
|
||||
row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0")
|
||||
g.admin_unread_count = row["cnt"] if row else 0
|
||||
except Exception:
|
||||
g.admin_unread_count = 0
|
||||
|
||||
|
||||
@bp.context_processor
|
||||
def _admin_context():
|
||||
"""Expose admin-specific variables to all pSEO templates."""
|
||||
from quart import g
|
||||
|
||||
return {"unread_count": getattr(g, "admin_unread_count", 0)}
|
||||
|
||||
|
||||
# ── Dashboard ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/")
|
||||
@role_required("admin")
|
||||
async def pseo_dashboard():
|
||||
"""pSEO Engine dashboard: template stats, freshness, recent jobs."""
|
||||
templates = discover_templates()
|
||||
|
||||
freshness = await get_template_freshness(templates)
|
||||
freshness_by_slug = {f["slug"]: f for f in freshness}
|
||||
|
||||
template_rows = []
|
||||
for t in templates:
|
||||
stats = await get_template_stats(t["slug"])
|
||||
template_rows.append({
|
||||
"template": t,
|
||||
"stats": stats,
|
||||
"freshness": freshness_by_slug.get(t["slug"], {}),
|
||||
})
|
||||
|
||||
total_articles = sum(r["stats"]["total"] for r in template_rows)
|
||||
total_published = sum(r["stats"]["published"] for r in template_rows)
|
||||
stale_count = sum(1 for f in freshness if f["status"] == "stale")
|
||||
|
||||
# Recent generation jobs — enough for the dashboard summary.
|
||||
jobs = await fetch_all(
|
||||
"SELECT id, task_name, status, progress_current, progress_total,"
|
||||
" error, error_log, created_at, completed_at"
|
||||
" FROM tasks WHERE task_name = 'generate_articles'"
|
||||
" ORDER BY created_at DESC LIMIT 5",
|
||||
)
|
||||
|
||||
return await render_template(
|
||||
"admin/pseo_dashboard.html",
|
||||
template_rows=template_rows,
|
||||
total_articles=total_articles,
|
||||
total_published=total_published,
|
||||
total_templates=len(templates),
|
||||
stale_count=stale_count,
|
||||
jobs=jobs,
|
||||
admin_page="pseo",
|
||||
)
|
||||
|
||||
|
||||
# ── Health checks (HTMX partial) ─────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/health")
|
||||
@role_required("admin")
|
||||
async def pseo_health():
|
||||
"""HTMX partial: all health issue lists."""
|
||||
templates = discover_templates()
|
||||
health = await get_all_health_issues(templates)
|
||||
return await render_template("admin/pseo_health.html", health=health)
|
||||
|
||||
|
||||
# ── Content gaps (HTMX partial + generate action) ────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/gaps/<slug>")
|
||||
@role_required("admin")
|
||||
async def pseo_gaps_template(slug: str):
|
||||
"""HTMX partial: content gaps for a specific template."""
|
||||
try:
|
||||
config = load_template(slug)
|
||||
except (AssertionError, FileNotFoundError):
|
||||
return "Template not found", 404
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug=slug,
|
||||
data_table=config["data_table"],
|
||||
natural_key=config["natural_key"],
|
||||
languages=config["languages"],
|
||||
)
|
||||
return await render_template(
|
||||
"admin/pseo_gaps.html",
|
||||
template=config,
|
||||
gaps=gaps,
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/gaps/<slug>/generate", methods=["POST"])
|
||||
@role_required("admin")
|
||||
@csrf_protect
|
||||
async def pseo_generate_gaps(slug: str):
|
||||
"""Enqueue a generation job limited to filling gaps for this template."""
|
||||
from ..worker import enqueue
|
||||
|
||||
try:
|
||||
config = load_template(slug)
|
||||
except (AssertionError, FileNotFoundError):
|
||||
await flash("Template not found.", "error")
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug=slug,
|
||||
data_table=config["data_table"],
|
||||
natural_key=config["natural_key"],
|
||||
languages=config["languages"],
|
||||
)
|
||||
|
||||
if not gaps:
|
||||
await flash(f"No gaps found for '{config['name']}' — nothing to generate.", "info")
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
await enqueue("generate_articles", {
|
||||
"template_slug": slug,
|
||||
"start_date": date.today().isoformat(),
|
||||
"articles_per_day": 500,
|
||||
"limit": 500,
|
||||
})
|
||||
await flash(
|
||||
f"Queued generation for {len(gaps)} missing articles in '{config['name']}'.",
|
||||
"success",
|
||||
)
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
|
||||
# ── Generation job monitoring ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/jobs")
|
||||
@role_required("admin")
|
||||
async def pseo_jobs():
|
||||
"""Full list of recent article generation jobs."""
|
||||
jobs = await fetch_all(
|
||||
"SELECT id, task_name, status, progress_current, progress_total,"
|
||||
" error, error_log, created_at, completed_at"
|
||||
" FROM tasks WHERE task_name = 'generate_articles'"
|
||||
" ORDER BY created_at DESC LIMIT 20",
|
||||
)
|
||||
return await render_template(
|
||||
"admin/pseo_jobs.html",
|
||||
jobs=jobs,
|
||||
admin_page="pseo",
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/jobs/<int:job_id>/status")
|
||||
@role_required("admin")
|
||||
async def pseo_job_status(job_id: int):
|
||||
"""HTMX polled endpoint: progress bar for a running generation job."""
|
||||
job = await fetch_one(
|
||||
"SELECT id, status, progress_current, progress_total, error, error_log,"
|
||||
" created_at, completed_at"
|
||||
" FROM tasks WHERE id = ?",
|
||||
(job_id,),
|
||||
)
|
||||
if not job:
|
||||
return "Job not found", 404
|
||||
return await render_template("admin/pseo_job_status.html", job=job)
|
||||
@@ -95,6 +95,12 @@
|
||||
Templates
|
||||
</a>
|
||||
|
||||
<div class="admin-sidebar__section">pSEO</div>
|
||||
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="{% if admin_page == 'pseo' %}active{% endif %}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M9.75 3.104v5.714a2.25 2.25 0 0 1-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 0 1 4.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0 1 12 15a9.065 9.065 0 0 1-6.23-.693L5 14.5m14.8.8 1.402 1.402c1.232 1.232.65 3.318-1.067 3.611A48.309 48.309 0 0 1 12 21c-2.773 0-5.491-.235-8.135-.687-1.718-.293-2.3-2.379-1.067-3.61L5 14.5"/></svg>
|
||||
pSEO Engine
|
||||
</a>
|
||||
|
||||
<div class="admin-sidebar__section">Email</div>
|
||||
<a href="{{ url_for('admin.emails') }}" class="{% if admin_page == 'emails' %}active{% endif %}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M21.75 6.75v10.5a2.25 2.25 0 0 1-2.25 2.25h-15a2.25 2.25 0 0 1-2.25-2.25V6.75m19.5 0A2.25 2.25 0 0 0 19.5 4.5h-15a2.25 2.25 0 0 0-2.25 2.25m19.5 0v.243a2.25 2.25 0 0 1-1.07 1.916l-7.5 4.615a2.25 2.25 0 0 1-2.36 0L3.32 8.91a2.25 2.25 0 0 1-1.07-1.916V6.75"/></svg>
|
||||
|
||||
195
web/src/padelnomics/admin/templates/admin/pseo_dashboard.html
Normal file
195
web/src/padelnomics/admin/templates/admin/pseo_dashboard.html
Normal file
@@ -0,0 +1,195 @@
|
||||
{% extends "admin/base_admin.html" %}
|
||||
{% set admin_page = "pseo" %}
|
||||
|
||||
{% block title %}pSEO Engine - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.pseo-status-badge {
|
||||
display: inline-flex; align-items: center; gap: 4px;
|
||||
font-size: 0.6875rem; font-weight: 600; padding: 2px 8px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
.pseo-status-fresh { background: #D1FAE5; color: #065F46; }
|
||||
.pseo-status-stale { background: #FEF3C7; color: #92400E; }
|
||||
.pseo-status-no_data { background: #F1F5F9; color: #64748B; }
|
||||
.pseo-status-no_articles { background: #EDE9FE; color: #5B21B6; }
|
||||
|
||||
.pseo-gaps-panel { border-top: 1px solid #E2E8F0; margin-top: 8px; padding-top: 8px; }
|
||||
|
||||
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 80px; }
|
||||
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-8">
|
||||
<div>
|
||||
<h1 class="text-2xl">pSEO Engine</h1>
|
||||
<p class="text-slate text-sm mt-1">Operational dashboard for programmatic SEO</p>
|
||||
</div>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}" class="btn-outline btn-sm">All Jobs</a>
|
||||
</header>
|
||||
|
||||
<!-- Summary Cards -->
|
||||
<div class="grid-4 mb-8">
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Total Articles</p>
|
||||
<p class="text-3xl font-bold text-navy">{{ total_articles }}</p>
|
||||
<p class="text-xs text-slate mt-1">{{ total_published }} published</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Templates</p>
|
||||
<p class="text-3xl font-bold text-navy">{{ total_templates }}</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Stale Templates</p>
|
||||
<p class="text-3xl font-bold {% if stale_count > 0 %}text-amber-600{% else %}text-navy{% endif %}">
|
||||
{{ stale_count }}
|
||||
</p>
|
||||
<p class="text-xs text-slate mt-1">data newer than articles</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Health Checks</p>
|
||||
<p class="text-3xl font-bold text-navy">—</p>
|
||||
<p class="text-xs text-slate mt-1">see Health section below</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Per-Template Table -->
|
||||
<div class="card mb-8">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Templates</span>
|
||||
<span class="text-xs text-slate">Click "Gaps" to load missing articles per template</span>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Template</th>
|
||||
<th>Data rows</th>
|
||||
<th>Articles EN</th>
|
||||
<th>Articles DE</th>
|
||||
<th>Freshness</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for r in template_rows %}
|
||||
{% set t = r.template %}
|
||||
{% set stats = r.stats %}
|
||||
{% set fr = r.freshness %}
|
||||
<tr>
|
||||
<td>
|
||||
<strong>{{ t.name }}</strong><br>
|
||||
<span class="text-xs text-slate">{{ t.slug }}</span>
|
||||
</td>
|
||||
<td>{{ fr.row_count if fr.row_count is not none else '—' }}</td>
|
||||
<td>{{ stats.by_language.get('en', {}).get('total', 0) }}</td>
|
||||
<td>{{ stats.by_language.get('de', {}).get('total', 0) }}</td>
|
||||
<td>
|
||||
{% set status = fr.status | default('no_data') %}
|
||||
<span class="pseo-status-badge pseo-status-{{ status }}">
|
||||
{% if status == 'fresh' %}🟢 Fresh
|
||||
{% elif status == 'stale' %}🟡 Stale
|
||||
{% elif status == 'no_articles' %}🟣 No articles
|
||||
{% else %}⚪ No data
|
||||
{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td class="flex gap-2 items-center">
|
||||
<button class="btn-outline btn-sm"
|
||||
hx-get="{{ url_for('pseo.pseo_gaps_template', slug=t.slug) }}"
|
||||
hx-target="#gaps-panel-{{ t.slug }}"
|
||||
hx-swap="innerHTML"
|
||||
hx-indicator="#gaps-panel-{{ t.slug }}">
|
||||
Gaps
|
||||
</button>
|
||||
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=t.slug) }}" class="m-0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn btn-sm">Generate gaps</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="6" class="p-0">
|
||||
<div id="gaps-panel-{{ t.slug }}" class="pseo-gaps-panel" style="padding: 0 1rem 0.5rem;">
|
||||
<!-- Loaded via HTMX on "Gaps" click -->
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Recent Jobs -->
|
||||
{% if jobs %}
|
||||
<div class="card mb-8">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Recent Generation Jobs</span>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}" class="text-xs text-blue">View all →</a>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Job</th>
|
||||
<th>Status</th>
|
||||
<th>Progress</th>
|
||||
<th>Started</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in jobs %}
|
||||
<tr>
|
||||
<td>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}#job-{{ job.id }}" class="text-blue">#{{ job.id }}</a>
|
||||
{% if job.payload %}
|
||||
— {{ (job.payload | fromjson).get('template_slug', '') }}
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% elif job.status == 'pending' %}
|
||||
<span class="badge-warning">Running</span>
|
||||
{% else %}
|
||||
<span class="badge">{{ job.status }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap">
|
||||
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}
|
||||
—
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ job.created_at | default('') | truncate(16, True, '') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Health Issues (HTMX-loaded) -->
|
||||
<div id="health-panel"
|
||||
hx-get="{{ url_for('pseo.pseo_health') }}"
|
||||
hx-trigger="load delay:500ms"
|
||||
hx-target="#health-panel"
|
||||
hx-swap="outerHTML">
|
||||
<div class="card">
|
||||
<p class="text-slate text-sm">Loading health checks…</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
43
web/src/padelnomics/admin/templates/admin/pseo_gaps.html
Normal file
43
web/src/padelnomics/admin/templates/admin/pseo_gaps.html
Normal file
@@ -0,0 +1,43 @@
|
||||
{# HTMX partial — rendered inside the gaps panel for one template.
|
||||
Loaded via GET /admin/pseo/gaps/<slug>. #}
|
||||
|
||||
{% if not gaps %}
|
||||
<p class="text-success text-sm p-2">✓ No gaps — all {{ template.name }} rows have articles.</p>
|
||||
{% else %}
|
||||
<div class="flex justify-between items-center mb-2">
|
||||
<span class="text-sm font-semibold">{{ gaps | length }} missing row{{ 's' if gaps | length != 1 else '' }}</span>
|
||||
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=template.slug) }}" class="m-0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn btn-sm">Generate {{ gaps | length }} missing</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="table-wrap" style="max-height: 300px; overflow-y: auto;">
|
||||
<table class="table text-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>{{ template.natural_key }}</th>
|
||||
<th>Missing languages</th>
|
||||
{% for key in (gaps[0].keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
|
||||
<th>{{ key }}</th>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for gap in gaps[:100] %}
|
||||
<tr>
|
||||
<td class="font-mono text-xs">{{ gap._natural_key }}</td>
|
||||
<td class="text-xs text-amber-700">{{ gap._missing_languages | join(', ') }}</td>
|
||||
{% for key in (gap.keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
|
||||
<td class="text-xs text-slate">{{ gap[key] | truncate(30) if gap[key] is string else gap[key] }}</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if gaps | length > 100 %}
|
||||
<tr>
|
||||
<td colspan="10" class="text-xs text-slate text-center">… and {{ gaps | length - 100 }} more rows</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endif %}
|
||||
99
web/src/padelnomics/admin/templates/admin/pseo_health.html
Normal file
99
web/src/padelnomics/admin/templates/admin/pseo_health.html
Normal file
@@ -0,0 +1,99 @@
|
||||
{# HTMX partial — loaded by pseo_dashboard.html and /admin/pseo/health directly.
|
||||
When loaded via HTMX (hx-swap="outerHTML"), renders a full card.
|
||||
When loaded standalone (full page), also works since it just outputs HTML. #}
|
||||
|
||||
<div class="card" id="health-panel">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Health Checks</span>
|
||||
<span class="text-xs text-slate">{{ health.counts.total }} issue{{ 's' if health.counts.total != 1 else '' }}</span>
|
||||
</div>
|
||||
|
||||
{% if health.counts.total == 0 %}
|
||||
<p class="text-success text-sm">✓ No issues found — all articles are healthy.</p>
|
||||
{% else %}
|
||||
|
||||
<!-- Hreflang Orphans -->
|
||||
{% if health.hreflang_orphans %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-amber-700">
|
||||
⚠ Hreflang orphans ({{ health.counts.hreflang_orphans }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— articles missing a sibling language</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Template</th><th>URL path</th><th>Present</th><th>Missing</th></tr></thead>
|
||||
<tbody>
|
||||
{% for o in health.hreflang_orphans[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs text-slate">{{ o.template_slug }}</td>
|
||||
<td><a href="{{ o.url_path }}" class="text-blue text-xs" target="_blank">{{ o.url_path }}</a></td>
|
||||
<td class="text-xs">{{ o.present_languages | join(', ') }}</td>
|
||||
<td class="text-xs text-red-600">{{ o.missing_languages | join(', ') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.hreflang_orphans | length > 50 %}
|
||||
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.hreflang_orphans | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<!-- Missing Build Files -->
|
||||
{% if health.missing_build_files %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-red-700">
|
||||
❌ Missing build files ({{ health.counts.missing_build_files }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— published articles with no HTML on disk</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Slug</th><th>Language</th><th>URL path</th><th>Expected path</th></tr></thead>
|
||||
<tbody>
|
||||
{% for m in health.missing_build_files[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs font-mono">{{ m.slug }}</td>
|
||||
<td class="text-xs">{{ m.language }}</td>
|
||||
<td class="text-xs"><a href="{{ m.url_path }}" class="text-blue" target="_blank">{{ m.url_path }}</a></td>
|
||||
<td class="text-xs text-slate font-mono">{{ m.expected_path }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.missing_build_files | length > 50 %}
|
||||
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.missing_build_files | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<!-- Broken Scenario Refs -->
|
||||
{% if health.broken_scenario_refs %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-red-700">
|
||||
❌ Broken scenario refs ({{ health.counts.broken_scenario_refs }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— [scenario:slug] markers referencing deleted scenarios</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Slug</th><th>Language</th><th>Broken refs</th></tr></thead>
|
||||
<tbody>
|
||||
{% for b in health.broken_scenario_refs[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs font-mono">{{ b.slug }}</td>
|
||||
<td class="text-xs">{{ b.language }}</td>
|
||||
<td class="text-xs text-red-600 font-mono">{{ b.broken_scenario_refs | join(', ') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.broken_scenario_refs | length > 50 %}
|
||||
<tr><td colspan="3" class="text-xs text-slate text-center">… and {{ health.broken_scenario_refs | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
{% endif %}
|
||||
</div>
|
||||
@@ -0,0 +1,45 @@
|
||||
{# HTMX partial — replaces the entire <tr> for a job row while it's running.
|
||||
Stops polling once the job is complete or failed (hx-trigger="every 2s" only applies
|
||||
while this partial keeps returning a polling trigger). #}
|
||||
|
||||
{% set pct = [((job.progress_current / job.progress_total) * 100) | int, 100] | min if job.progress_total else 0 %}
|
||||
|
||||
<tr id="job-{{ job.id }}"
|
||||
{% if job.status == 'pending' %}
|
||||
hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
|
||||
hx-trigger="every 2s"
|
||||
hx-target="this"
|
||||
hx-swap="outerHTML"
|
||||
{% endif %}>
|
||||
<td class="text-xs text-slate">#{{ job.id }}</td>
|
||||
<td>—</td>{# payload not re-fetched in status endpoint — static display #}
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% else %}
|
||||
<span class="badge-warning">Running…</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap" style="min-width:120px;">
|
||||
<div class="progress-bar-fill" style="width: {{ pct }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
|
||||
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
|
||||
<td>
|
||||
{% if job.error %}
|
||||
<details>
|
||||
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
|
||||
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
|
||||
</details>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
95
web/src/padelnomics/admin/templates/admin/pseo_jobs.html
Normal file
95
web/src/padelnomics/admin/templates/admin/pseo_jobs.html
Normal file
@@ -0,0 +1,95 @@
|
||||
{% extends "admin/base_admin.html" %}
|
||||
{% set admin_page = "pseo" %}
|
||||
|
||||
{% block title %}pSEO Jobs - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 120px; }
|
||||
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-8">
|
||||
<div>
|
||||
<h1 class="text-2xl">Generation Jobs</h1>
|
||||
<p class="text-slate text-sm mt-1">Recent article generation runs</p>
|
||||
</div>
|
||||
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="btn-outline btn-sm">← pSEO Engine</a>
|
||||
</header>
|
||||
|
||||
{% if not jobs %}
|
||||
<div class="card">
|
||||
<p class="text-slate text-sm">No generation jobs found. Use the pSEO Engine dashboard to generate articles.</p>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="card">
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th>Template</th>
|
||||
<th>Status</th>
|
||||
<th>Progress</th>
|
||||
<th>Started</th>
|
||||
<th>Completed</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in jobs %}
|
||||
<tr id="job-{{ job.id }}">
|
||||
<td class="text-xs text-slate">#{{ job.id }}</td>
|
||||
<td>
|
||||
{% if job.payload %}
|
||||
{% set payload = job.payload | fromjson %}
|
||||
<span class="font-mono text-xs">{{ payload.get('template_slug', '—') }}</span>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% elif job.status == 'pending' %}
|
||||
{# Poll live status for running jobs #}
|
||||
<div hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
|
||||
hx-trigger="load, every 2s"
|
||||
hx-target="closest tr"
|
||||
hx-swap="outerHTML">
|
||||
<span class="badge-warning">Running…</span>
|
||||
</div>
|
||||
{% else %}
|
||||
<span class="badge">{{ job.status }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap">
|
||||
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
|
||||
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
|
||||
<td>
|
||||
{% if job.error %}
|
||||
<details>
|
||||
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
|
||||
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
|
||||
</details>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -1,13 +1,22 @@
|
||||
"""
|
||||
Padelnomics - Application factory and entry point.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from quart import Quart, Response, abort, g, redirect, request, session, url_for
|
||||
|
||||
from .analytics import close_analytics_db, open_analytics_db
|
||||
from .core import close_db, config, get_csrf_token, init_db, is_flag_enabled, setup_logging, setup_request_id
|
||||
from .core import (
|
||||
close_db,
|
||||
config,
|
||||
get_csrf_token,
|
||||
init_db,
|
||||
is_flag_enabled,
|
||||
setup_logging,
|
||||
setup_request_id,
|
||||
)
|
||||
|
||||
setup_logging()
|
||||
from .i18n import LANG_BLUEPRINTS, SUPPORTED_LANGS, get_country_name, get_translations
|
||||
@@ -97,6 +106,7 @@ def create_app() -> Quart:
|
||||
app.jinja_env.filters["fmt_n"] = _fmt_n
|
||||
app.jinja_env.filters["tformat"] = _tformat # translate with placeholders: {{ t.key | tformat(count=n) }}
|
||||
app.jinja_env.filters["country_name"] = get_country_name # {{ article.country | country_name(lang) }}
|
||||
app.jinja_env.filters["fromjson"] = json.loads # {{ job.payload | fromjson }}
|
||||
|
||||
# Session config
|
||||
app.config["SESSION_COOKIE_SECURE"] = not config.DEBUG
|
||||
@@ -303,6 +313,7 @@ def create_app() -> Quart:
|
||||
# Blueprint registration
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
from .admin.pseo_routes import bp as pseo_bp
|
||||
from .admin.routes import bp as admin_bp
|
||||
from .auth.routes import bp as auth_bp
|
||||
from .billing.routes import bp as billing_bp
|
||||
@@ -327,6 +338,7 @@ def create_app() -> Quart:
|
||||
app.register_blueprint(dashboard_bp)
|
||||
app.register_blueprint(billing_bp)
|
||||
app.register_blueprint(admin_bp)
|
||||
app.register_blueprint(pseo_bp)
|
||||
app.register_blueprint(webhooks_bp)
|
||||
|
||||
# Content catch-all LAST — lives under /<lang> too
|
||||
|
||||
@@ -284,6 +284,7 @@ async def generate_articles(
|
||||
*,
|
||||
limit: int = 500,
|
||||
base_url: str = "https://padelnomics.io",
|
||||
task_id: int | None = None,
|
||||
) -> int:
|
||||
"""
|
||||
Generate articles from a git template + DuckDB data.
|
||||
@@ -297,8 +298,14 @@ async def generate_articles(
|
||||
- write HTML to disk
|
||||
- upsert article row in SQLite
|
||||
|
||||
Returns count of articles generated.
|
||||
If task_id is given, writes progress_current / progress_total / error_log
|
||||
to the tasks table every _PROGRESS_BATCH articles so the pSEO dashboard
|
||||
can show a live progress bar. Per-article errors are logged and collected
|
||||
rather than aborting the run — the full task still completes.
|
||||
|
||||
Returns count of articles generated (excluding per-article errors).
|
||||
"""
|
||||
from ..core import execute as db_execute
|
||||
from ..planner.calculator import DEFAULTS, calc, validate_state
|
||||
from .routes import bake_scenario_cards, is_reserved_path
|
||||
|
||||
@@ -330,6 +337,15 @@ async def generate_articles(
|
||||
t_calc = t_render = t_bake = 0.0
|
||||
|
||||
_BATCH_SIZE = 200
|
||||
_PROGRESS_BATCH = 50 # write task progress every N articles (avoid write amplification)
|
||||
|
||||
# Write progress_total before the loop so the dashboard can show 0/N immediately.
|
||||
if task_id is not None:
|
||||
total = len(rows) * len(config["languages"])
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_total = ? WHERE id = ?",
|
||||
(total, task_id),
|
||||
)
|
||||
|
||||
async with transaction() as db:
|
||||
for row in rows:
|
||||
@@ -515,12 +531,27 @@ async def generate_articles(
|
||||
elif generated % 25 == 0:
|
||||
logger.info("%s: %d articles written…", slug, generated)
|
||||
|
||||
# Write progress every _PROGRESS_BATCH articles so the pSEO
|
||||
# dashboard live-updates without excessive write amplification.
|
||||
if task_id is not None and generated % _PROGRESS_BATCH == 0:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
# Stagger dates
|
||||
published_today += 1
|
||||
if published_today >= articles_per_day:
|
||||
published_today = 0
|
||||
publish_date += timedelta(days=1)
|
||||
|
||||
# Write final progress so the dashboard shows 100% on completion.
|
||||
if task_id is not None:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs",
|
||||
slug, generated, t_calc, t_render, t_bake,
|
||||
|
||||
397
web/src/padelnomics/content/health.py
Normal file
397
web/src/padelnomics/content/health.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
pSEO Engine health checks and content gap queries.
|
||||
|
||||
All functions are async, pure queries — no side effects.
|
||||
Used by the pSEO Engine admin dashboard.
|
||||
|
||||
Functions overview:
|
||||
get_template_stats() — article counts per status/language for one template
|
||||
get_template_freshness() — compare _serving_meta.json timestamp vs last article generation
|
||||
get_content_gaps() — DuckDB rows with no matching article for a template+language
|
||||
check_hreflang_orphans() — published articles missing a sibling language
|
||||
check_missing_build_files()— published articles whose HTML file is absent from disk
|
||||
check_broken_scenario_refs()— articles referencing [scenario:slug] that doesn't exist
|
||||
get_all_health_issues() — run all checks, return counts + details
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from ..analytics import fetch_analytics
|
||||
from ..core import fetch_all
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Directory where generate_articles() writes HTML + markdown source files.
|
||||
BUILD_DIR = Path("data/content/_build")
|
||||
|
||||
# Pattern matching [scenario:slug] and [scenario:slug:section] markers.
|
||||
_SCENARIO_REF_RE = re.compile(r"\[scenario:([a-z0-9_-]+)(?::[a-z]+)?\]")
|
||||
|
||||
|
||||
def _validate_table_name(data_table: str) -> None:
|
||||
"""Guard against SQL injection in table names."""
|
||||
assert re.match(r"^[a-z_][a-z0-9_.]*$", data_table), (
|
||||
f"Invalid table name: {data_table}"
|
||||
)
|
||||
|
||||
|
||||
def _read_serving_meta() -> dict:
|
||||
"""Read _serving_meta.json written by export_serving.py. Returns {} if absent."""
|
||||
serving_path = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb")
|
||||
meta_path = Path(serving_path).parent / "_serving_meta.json"
|
||||
if not meta_path.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(meta_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
|
||||
|
||||
def _parse_dt(s: str | None) -> datetime | None:
|
||||
"""Parse an ISO datetime string to a naive UTC datetime. Returns None on failure."""
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
dt = datetime.fromisoformat(s)
|
||||
# Strip timezone info so both aware (from meta) and naive (from SQLite) compare cleanly.
|
||||
return dt.replace(tzinfo=None)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
# ── Template statistics ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_template_stats(template_slug: str) -> dict:
|
||||
"""Article counts for a template: total, published, draft, scheduled, by language.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"total": N,
|
||||
"published": N,
|
||||
"draft": N,
|
||||
"scheduled": N,
|
||||
"by_language": {"en": {"total": N, "published": N, ...}, ...},
|
||||
}
|
||||
"""
|
||||
rows = await fetch_all(
|
||||
"SELECT status, language, COUNT(*) as cnt FROM articles"
|
||||
" WHERE template_slug = ? GROUP BY status, language",
|
||||
(template_slug,),
|
||||
)
|
||||
stats: dict = {"total": 0, "published": 0, "draft": 0, "scheduled": 0, "by_language": {}}
|
||||
for r in rows:
|
||||
cnt = r["cnt"]
|
||||
status = r["status"]
|
||||
lang = r["language"]
|
||||
|
||||
stats["total"] += cnt
|
||||
if status in stats:
|
||||
stats[status] += cnt
|
||||
|
||||
if lang not in stats["by_language"]:
|
||||
stats["by_language"][lang] = {"total": 0, "published": 0, "draft": 0, "scheduled": 0}
|
||||
stats["by_language"][lang]["total"] += cnt
|
||||
if status in stats["by_language"][lang]:
|
||||
stats["by_language"][lang][status] += cnt
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# ── Data freshness ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_template_freshness(templates: list[dict]) -> list[dict]:
|
||||
"""Compare _serving_meta.json exported_at vs max(articles.updated_at) per template.
|
||||
|
||||
Returns list of dicts — one per template:
|
||||
{
|
||||
"slug": str,
|
||||
"name": str,
|
||||
"data_table": str,
|
||||
"exported_at_utc": str | None, # from _serving_meta.json
|
||||
"last_generated": str | None, # max(updated_at) in articles
|
||||
"row_count": int | None, # DuckDB row count from meta
|
||||
"status": "fresh" | "stale" | "no_articles" | "no_data",
|
||||
}
|
||||
|
||||
Freshness semantics:
|
||||
"fresh" — articles generated after last data export (up to date)
|
||||
"stale" — data export is newer than last article generation (regen needed)
|
||||
"no_articles" — DuckDB data exists but no articles generated yet
|
||||
"no_data" — _serving_meta.json absent (export_serving not yet run)
|
||||
"""
|
||||
meta = _read_serving_meta()
|
||||
exported_at_str = meta.get("exported_at_utc")
|
||||
exported_at = _parse_dt(exported_at_str)
|
||||
table_meta = meta.get("tables", {})
|
||||
|
||||
result = []
|
||||
for t in templates:
|
||||
slug = t["slug"]
|
||||
data_table = t.get("data_table", "")
|
||||
# Strip schema prefix to match the key in _serving_meta.json tables dict.
|
||||
# e.g. "serving.pseo_city_costs_de" → "pseo_city_costs_de"
|
||||
table_key = data_table.split(".")[-1] if "." in data_table else data_table
|
||||
|
||||
rows = await fetch_all(
|
||||
"SELECT MAX(COALESCE(updated_at, created_at)) as last_gen FROM articles"
|
||||
" WHERE template_slug = ?",
|
||||
(slug,),
|
||||
)
|
||||
last_gen_str = rows[0]["last_gen"] if rows else None
|
||||
last_gen = _parse_dt(last_gen_str)
|
||||
|
||||
row_count = table_meta.get(table_key, {}).get("row_count")
|
||||
|
||||
if not exported_at_str:
|
||||
status = "no_data"
|
||||
elif last_gen is None:
|
||||
status = "no_articles"
|
||||
elif exported_at and last_gen and exported_at > last_gen:
|
||||
# New data available — articles haven't been regenerated against it yet.
|
||||
status = "stale"
|
||||
else:
|
||||
status = "fresh"
|
||||
|
||||
result.append({
|
||||
"slug": slug,
|
||||
"name": t.get("name", slug),
|
||||
"data_table": data_table,
|
||||
"exported_at_utc": exported_at_str,
|
||||
"last_generated": last_gen_str,
|
||||
"row_count": row_count,
|
||||
"status": status,
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── Content gaps ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_content_gaps(
|
||||
template_slug: str,
|
||||
data_table: str,
|
||||
natural_key: str,
|
||||
languages: list[str],
|
||||
limit: int = 200,
|
||||
) -> list[dict]:
|
||||
"""Return DuckDB rows that have no matching article for at least one language.
|
||||
|
||||
The article slug is constructed as: "{template_slug}-{lang}-{natural_key_value}"
|
||||
This lets us efficiently detect gaps without rendering URL patterns.
|
||||
|
||||
Returns list of dicts — each is the DuckDB row with two extra keys:
|
||||
"_natural_key": str — the natural key value for this row
|
||||
"_missing_languages": list[str] — languages with no article
|
||||
"""
|
||||
assert languages, "languages must not be empty"
|
||||
_validate_table_name(data_table)
|
||||
|
||||
# Fetch all article slugs for this template to determine which rows exist.
|
||||
slug_rows = await fetch_all(
|
||||
"SELECT slug, language FROM articles WHERE template_slug = ?",
|
||||
(template_slug,),
|
||||
)
|
||||
|
||||
# Build lookup: (lang, natural_key_value) → True
|
||||
prefix_by_lang = {lang: f"{template_slug}-{lang}-" for lang in languages}
|
||||
existing: set[tuple[str, str]] = set()
|
||||
for r in slug_rows:
|
||||
lang = r["language"]
|
||||
if lang not in prefix_by_lang:
|
||||
continue
|
||||
prefix = prefix_by_lang[lang]
|
||||
if r["slug"].startswith(prefix):
|
||||
nk_val = r["slug"][len(prefix):]
|
||||
existing.add((lang, nk_val))
|
||||
|
||||
duckdb_rows = await fetch_analytics(
|
||||
f"SELECT * FROM {data_table} LIMIT ?",
|
||||
[limit],
|
||||
)
|
||||
|
||||
gaps = []
|
||||
for row in duckdb_rows:
|
||||
nk_val = str(row.get(natural_key, ""))
|
||||
missing = [lang for lang in languages if (lang, nk_val) not in existing]
|
||||
if missing:
|
||||
gaps.append({**row, "_natural_key": nk_val, "_missing_languages": missing})
|
||||
|
||||
return gaps
|
||||
|
||||
|
||||
# ── Health checks ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def check_hreflang_orphans(templates: list[dict]) -> list[dict]:
|
||||
"""Published articles missing a sibling language expected by their template.
|
||||
|
||||
For example: city-cost-de generates EN + DE. If the EN article exists but
|
||||
DE is absent, that article is an hreflang orphan.
|
||||
|
||||
Orphan detection is based on the slug pattern "{template_slug}-{lang}-{natural_key}".
|
||||
Articles are grouped by natural key; if any expected language is missing, the group
|
||||
is an orphan.
|
||||
|
||||
Returns list of dicts:
|
||||
{
|
||||
"template_slug": str,
|
||||
"url_path": str, # url_path of one present article for context
|
||||
"present_languages": list[str],
|
||||
"missing_languages": list[str],
|
||||
}
|
||||
"""
|
||||
orphans = []
|
||||
for t in templates:
|
||||
expected = set(t.get("languages", ["en"]))
|
||||
if len(expected) < 2:
|
||||
continue # Single-language template — no orphans possible.
|
||||
|
||||
rows = await fetch_all(
|
||||
"SELECT slug, language, url_path FROM articles"
|
||||
" WHERE template_slug = ? AND status = 'published'",
|
||||
(t["slug"],),
|
||||
)
|
||||
|
||||
# Group by natural key extracted from slug pattern:
|
||||
# "{template_slug}-{lang}-{natural_key}" → strip template prefix, then lang prefix.
|
||||
slug_prefix = t["slug"] + "-"
|
||||
by_nk: dict[str, dict] = {} # nk → {"langs": set, "url_path": str}
|
||||
for r in rows:
|
||||
slug = r["slug"]
|
||||
lang = r["language"]
|
||||
if not slug.startswith(slug_prefix):
|
||||
continue
|
||||
rest = slug[len(slug_prefix):] # "{lang}-{natural_key}"
|
||||
lang_prefix = lang + "-"
|
||||
if not rest.startswith(lang_prefix):
|
||||
continue
|
||||
nk = rest[len(lang_prefix):]
|
||||
if nk not in by_nk:
|
||||
by_nk[nk] = {"langs": set(), "url_path": r["url_path"]}
|
||||
by_nk[nk]["langs"].add(lang)
|
||||
|
||||
for nk, info in by_nk.items():
|
||||
present = info["langs"]
|
||||
missing = sorted(expected - present)
|
||||
if missing:
|
||||
orphans.append({
|
||||
"template_slug": t["slug"],
|
||||
"url_path": info["url_path"],
|
||||
"present_languages": sorted(present),
|
||||
"missing_languages": missing,
|
||||
})
|
||||
|
||||
return orphans
|
||||
|
||||
|
||||
async def check_missing_build_files(build_dir: Path | None = None) -> list[dict]:
|
||||
"""Published articles whose HTML file is absent from disk.
|
||||
|
||||
Expected path: BUILD_DIR/{language}/{slug}.html
|
||||
|
||||
Returns list of dicts:
|
||||
{"id", "slug", "language", "url_path", "template_slug", "expected_path"}
|
||||
"""
|
||||
bd = build_dir or BUILD_DIR
|
||||
rows = await fetch_all(
|
||||
"SELECT id, slug, language, url_path, template_slug FROM articles"
|
||||
" WHERE status = 'published'",
|
||||
)
|
||||
missing = []
|
||||
for r in rows:
|
||||
path = bd / r["language"] / f"{r['slug']}.html"
|
||||
if not path.exists():
|
||||
missing.append({
|
||||
"id": r["id"],
|
||||
"slug": r["slug"],
|
||||
"language": r["language"],
|
||||
"url_path": r["url_path"],
|
||||
"template_slug": r["template_slug"],
|
||||
"expected_path": str(path),
|
||||
})
|
||||
return missing
|
||||
|
||||
|
||||
async def check_broken_scenario_refs(build_dir: Path | None = None) -> list[dict]:
|
||||
"""pSEO articles referencing [scenario:slug] markers that don't exist.
|
||||
|
||||
Reads markdown source from BUILD_DIR/{language}/md/{slug}.md.
|
||||
Only checks published articles with a template_slug (pSEO-generated).
|
||||
|
||||
Returns list of dicts:
|
||||
{"id", "slug", "language", "url_path", "broken_scenario_refs": [str, ...]}
|
||||
"""
|
||||
bd = build_dir or BUILD_DIR
|
||||
|
||||
scenario_rows = await fetch_all("SELECT slug FROM published_scenarios")
|
||||
valid_slugs = {r["slug"] for r in scenario_rows}
|
||||
|
||||
articles = await fetch_all(
|
||||
"SELECT id, slug, language, url_path FROM articles"
|
||||
" WHERE status = 'published' AND template_slug IS NOT NULL",
|
||||
)
|
||||
|
||||
broken = []
|
||||
for a in articles:
|
||||
md_path = bd / a["language"] / "md" / f"{a['slug']}.md"
|
||||
if not md_path.exists():
|
||||
continue
|
||||
markdown = md_path.read_text()
|
||||
refs = {m.group(1) for m in _SCENARIO_REF_RE.finditer(markdown)}
|
||||
missing_refs = sorted(refs - valid_slugs)
|
||||
if missing_refs:
|
||||
broken.append({
|
||||
"id": a["id"],
|
||||
"slug": a["slug"],
|
||||
"language": a["language"],
|
||||
"url_path": a["url_path"],
|
||||
"broken_scenario_refs": missing_refs,
|
||||
})
|
||||
|
||||
return broken
|
||||
|
||||
|
||||
# ── Aggregate check ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_all_health_issues(
|
||||
templates: list[dict],
|
||||
build_dir: Path | None = None,
|
||||
) -> dict:
|
||||
"""Run all health checks, return issue counts and full detail lists.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"hreflang_orphans": [...],
|
||||
"missing_build_files": [...],
|
||||
"broken_scenario_refs": [...],
|
||||
"counts": {
|
||||
"hreflang_orphans": N,
|
||||
"missing_build_files": N,
|
||||
"broken_scenario_refs": N,
|
||||
"total": N,
|
||||
},
|
||||
}
|
||||
"""
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
missing_files = await check_missing_build_files(build_dir)
|
||||
broken_refs = await check_broken_scenario_refs(build_dir)
|
||||
|
||||
return {
|
||||
"hreflang_orphans": orphans,
|
||||
"missing_build_files": missing_files,
|
||||
"broken_scenario_refs": broken_refs,
|
||||
"counts": {
|
||||
"hreflang_orphans": len(orphans),
|
||||
"missing_build_files": len(missing_files),
|
||||
"broken_scenario_refs": len(broken_refs),
|
||||
"total": len(orphans) + len(missing_files) + len(broken_refs),
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Add progress tracking columns to the tasks table.
|
||||
|
||||
Enables the pSEO Engine dashboard to show live progress during article
|
||||
generation jobs: a progress bar (current/total) and an error log for
|
||||
per-article failures without aborting the whole run.
|
||||
"""
|
||||
|
||||
|
||||
def up(conn) -> None:
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_current INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_total INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN error_log TEXT NOT NULL DEFAULT '[]'"
|
||||
)
|
||||
@@ -4,11 +4,10 @@ Background task worker - SQLite-based queue (no Redis needed).
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
EMAIL_ADDRESSES,
|
||||
config,
|
||||
@@ -754,8 +753,11 @@ async def handle_generate_articles(payload: dict) -> None:
|
||||
start_date = date_cls.fromisoformat(payload["start_date"])
|
||||
articles_per_day = payload.get("articles_per_day", 3)
|
||||
limit = payload.get("limit", 500)
|
||||
task_id = payload.get("_task_id")
|
||||
|
||||
count = await generate_articles(slug, start_date, articles_per_day, limit=limit)
|
||||
count = await generate_articles(
|
||||
slug, start_date, articles_per_day, limit=limit, task_id=task_id
|
||||
)
|
||||
logger.info("Generated %s articles for template '%s'", count, slug)
|
||||
|
||||
|
||||
@@ -777,6 +779,9 @@ async def process_task(task: dict) -> None:
|
||||
|
||||
try:
|
||||
payload = json.loads(task["payload"]) if task["payload"] else {}
|
||||
# Inject task_id so progress-aware handlers (e.g. generate_articles) can
|
||||
# write progress_current to the tasks table without a separate lookup.
|
||||
payload["_task_id"] = task_id
|
||||
await handler(payload)
|
||||
await mark_complete(task_id)
|
||||
logger.info("Completed: %s (id=%s)", task_name, task_id)
|
||||
|
||||
Reference in New Issue
Block a user