feat: SEO/GEO admin hub — migration, sync module, routes, templates

- Migration 0019: seo_search_metrics, seo_analytics_metrics, seo_sync_log tables
- seo/ module: GSC, Bing, Umami sync + query functions (search perf, funnel, scorecard)
- Admin routes: /admin/seo hub with HTMX tabs + manual sync trigger
- Admin templates: hub page, search/funnel/scorecard partials, sidebar nav entry
- Worker: sync_gsc, sync_bing, sync_umami, cleanup_seo_metrics tasks + daily scheduler
- Config: GSC_SERVICE_ACCOUNT_PATH, GSC_SITE_URL, BING_WEBMASTER_API_KEY, BING_SITE_URL
- Deps: httpx, google-api-python-client, google-auth

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 15:00:36 +01:00
parent afd46398af
commit ccf03db9a3
15 changed files with 1583 additions and 2 deletions

View File

@@ -17,6 +17,9 @@ dependencies = [
"weasyprint>=68.1",
"duckdb>=1.0.0",
"pyyaml>=6.0",
"httpx>=0.27.0",
"google-api-python-client>=2.100.0",
"google-auth>=2.23.0",
]
[build-system]

View File

@@ -1401,3 +1401,134 @@ async def _rebuild_article(article_id: int):
body_html = await bake_scenario_cards(body_html, lang=lang)
BUILD_DIR.mkdir(parents=True, exist_ok=True)
(BUILD_DIR / f"{article['slug']}.html").write_text(body_html)
# =============================================================================
# SEO Hub
# =============================================================================
@bp.route("/seo")
@role_required("admin")
async def seo():
"""SEO metrics hub — overview + tabs for search, funnel, scorecard."""
from ..seo import get_search_performance, get_sync_status
date_range_days = int(request.args.get("days", "28") or "28")
date_range_days = max(1, min(date_range_days, 730))
overview = await get_search_performance(date_range_days=date_range_days)
sync_status = await get_sync_status()
return await render_template(
"admin/seo.html",
overview=overview,
sync_status=sync_status,
date_range_days=date_range_days,
)
@bp.route("/seo/search")
@role_required("admin")
async def seo_search():
"""HTMX partial: search performance tab."""
from ..seo import (
get_country_breakdown,
get_device_breakdown,
get_top_pages,
get_top_queries,
)
days = int(request.args.get("days", "28") or "28")
days = max(1, min(days, 730))
source = request.args.get("source", "") or None
queries = await get_top_queries(date_range_days=days, source=source)
pages = await get_top_pages(date_range_days=days, source=source)
countries = await get_country_breakdown(date_range_days=days)
devices = await get_device_breakdown(date_range_days=days)
return await render_template(
"admin/partials/seo_search.html",
queries=queries,
pages=pages,
countries=countries,
devices=devices,
date_range_days=days,
current_source=source,
)
@bp.route("/seo/funnel")
@role_required("admin")
async def seo_funnel():
"""HTMX partial: full funnel view."""
from ..seo import get_funnel_metrics
days = int(request.args.get("days", "28") or "28")
days = max(1, min(days, 730))
funnel = await get_funnel_metrics(date_range_days=days)
return await render_template(
"admin/partials/seo_funnel.html",
funnel=funnel,
date_range_days=days,
)
@bp.route("/seo/scorecard")
@role_required("admin")
async def seo_scorecard():
"""HTMX partial: article scorecard."""
from ..seo import get_article_scorecard
days = int(request.args.get("days", "28") or "28")
days = max(1, min(days, 730))
template_slug = request.args.get("template_slug", "") or None
country_filter = request.args.get("country", "") or None
language = request.args.get("language", "") or None
sort_by = request.args.get("sort", "impressions")
sort_dir = request.args.get("dir", "desc")
scorecard = await get_article_scorecard(
date_range_days=days,
template_slug=template_slug,
country=country_filter,
language=language,
sort_by=sort_by,
sort_dir=sort_dir,
)
return await render_template(
"admin/partials/seo_scorecard.html",
scorecard=scorecard,
date_range_days=days,
current_template=template_slug,
current_country=country_filter,
current_language=language,
current_sort=sort_by,
current_dir=sort_dir,
)
@bp.route("/seo/sync", methods=["POST"])
@role_required("admin")
@csrf_protect
async def seo_sync_now():
"""Manually trigger SEO data sync."""
from ..worker import enqueue
form = await request.form
source = form.get("source", "all")
if source == "all":
await enqueue("sync_gsc")
await enqueue("sync_bing")
await enqueue("sync_umami")
await flash("All SEO syncs queued.", "success")
elif source in ("gsc", "bing", "umami"):
await enqueue(f"sync_{source}")
await flash(f"{source.upper()} sync queued.", "success")
else:
await flash("Unknown source.", "error")
return redirect(url_for("admin.seo"))

View File

@@ -86,6 +86,12 @@
Templates
</a>
<div class="admin-sidebar__section">Analytics</div>
<a href="{{ url_for('admin.seo') }}" class="{% if admin_page == 'seo' %}active{% endif %}">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M2.25 18 9 11.25l4.306 4.306a11.95 11.95 0 0 1 5.814-5.518l2.74-1.22m0 0-5.94-2.281m5.94 2.28-2.28 5.941"/></svg>
SEO Hub
</a>
<div class="admin-sidebar__section">System</div>
<a href="{{ url_for('admin.tasks') }}" class="{% if admin_page == 'tasks' %}active{% endif %}">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M10.5 6h9.75M10.5 6a1.5 1.5 0 1 1-3 0m3 0a1.5 1.5 0 1 0-3 0M3.75 6H7.5m3 12h9.75m-9.75 0a1.5 1.5 0 0 1-3 0m3 0a1.5 1.5 0 0 0-3 0m-3.75 0H7.5m9-6h3.75m-3.75 0a1.5 1.5 0 0 1-3 0m3 0a1.5 1.5 0 0 0-3 0m-9.75 0h9.75"/></svg>

View File

@@ -0,0 +1,96 @@
<!-- Full Funnel: Search → Analytics → Conversions -->
<style>
.funnel-stage {
display: flex; align-items: center; gap: 1rem; padding: 0.75rem 1rem;
border-bottom: 1px solid #F1F5F9;
}
.funnel-stage:last-child { border-bottom: none; }
.funnel-label { width: 120px; flex-shrink: 0; }
.funnel-label span { font-size: 0.8125rem; font-weight: 600; color: #0F172A; }
.funnel-label small { font-size: 0.6875rem; color: #94A3B8; display: block; }
.funnel-bar-wrap { flex: 1; height: 28px; background: #F1F5F9; border-radius: 4px; overflow: hidden; }
.funnel-bar { height: 100%; border-radius: 4px; min-width: 2px; transition: width 0.3s; }
.funnel-value { width: 100px; text-align: right; flex-shrink: 0; }
.funnel-value span { font-size: 0.9375rem; font-weight: 700; color: #0F172A; }
.funnel-value small { font-size: 0.6875rem; color: #94A3B8; display: block; }
.funnel-section-label {
font-size: 0.5625rem; font-weight: 700; text-transform: uppercase;
letter-spacing: 0.06em; color: #94A3B8; padding: 0.5rem 1rem 0.25rem;
}
</style>
{% set max_val = [funnel.impressions, funnel.clicks, funnel.pageviews, funnel.visitors, funnel.planner_users, funnel.leads] | max or 1 %}
<div class="card">
<div class="funnel-section-label">Search (GSC + Bing)</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Impressions</span><small>Search results shown</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.impressions / max_val * 100) | round(1) }}%;background:#3B82F6"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.impressions | int) }}</span>
</div>
</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Clicks</span><small>CTR: {{ "%.1f" | format(funnel.ctr * 100) }}%</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.clicks / max_val * 100) | round(1) }}%;background:#2563EB"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.clicks | int) }}</span>
</div>
</div>
<div class="funnel-section-label" style="border-top:1px solid #E2E8F0;margin-top:0.25rem;padding-top:0.75rem">Analytics (Umami)</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Pageviews</span><small>{% if funnel.clicks %}{{ "%.0f" | format(funnel.click_to_view * 100) }}% of clicks{% endif %}</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.pageviews / max_val * 100) | round(1) }}%;background:#8B5CF6"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.pageviews | int) }}</span>
</div>
</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Visitors</span><small>Unique</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.visitors / max_val * 100) | round(1) }}%;background:#7C3AED"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.visitors | int) }}</span>
</div>
</div>
<div class="funnel-section-label" style="border-top:1px solid #E2E8F0;margin-top:0.25rem;padding-top:0.75rem">Conversions (App)</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Planner Users</span><small>{% if funnel.visitors %}{{ "%.1f" | format(funnel.visitor_to_planner * 100) }}% of visitors{% endif %}</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.planner_users / max_val * 100) | round(1) }}%;background:#10B981"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.planner_users | int) }}</span>
</div>
</div>
<div class="funnel-stage">
<div class="funnel-label"><span>Lead Requests</span><small>{% if funnel.planner_users %}{{ "%.1f" | format(funnel.planner_to_lead * 100) }}% of planners{% endif %}</small></div>
<div class="funnel-bar-wrap">
<div class="funnel-bar" style="width:{{ (funnel.leads / max_val * 100) | round(1) }}%;background:#059669"></div>
</div>
<div class="funnel-value">
<span>{{ "{:,}".format(funnel.leads | int) }}</span>
</div>
</div>
</div>
{% if not funnel.impressions and not funnel.pageviews and not funnel.planner_users %}
<div class="card text-center mt-4" style="padding:1.5rem">
<p class="text-slate text-sm">No funnel data yet. Run a sync to populate search and analytics metrics.</p>
</div>
{% endif %}

View File

@@ -0,0 +1,104 @@
<!-- Article Scorecard: per-article performance with attention flags -->
<!-- Filters -->
<div class="card mb-4" style="padding:0.75rem 1rem;">
<form class="flex flex-wrap gap-3 items-end"
hx-get="{{ url_for('admin.seo_scorecard') }}"
hx-target="#tab-content"
hx-trigger="change"
hx-include="this">
<input type="hidden" name="days" value="{{ date_range_days }}">
<div>
<label class="text-xs font-semibold text-slate block mb-1">Template</label>
<select name="template_slug" class="form-input" style="min-width:140px">
<option value="">All</option>
{% for item in scorecard %}
{% if item.template_slug and item.template_slug not in seen_templates %}
<option value="{{ item.template_slug }}" {% if item.template_slug == current_template %}selected{% endif %}>{{ item.template_slug }}</option>
{% endif %}
{% endfor %}
</select>
</div>
<div>
<label class="text-xs font-semibold text-slate block mb-1">Language</label>
<select name="language" class="form-input" style="min-width:80px">
<option value="">All</option>
<option value="en" {% if current_language == 'en' %}selected{% endif %}>EN</option>
<option value="de" {% if current_language == 'de' %}selected{% endif %}>DE</option>
</select>
</div>
<div>
<label class="text-xs font-semibold text-slate block mb-1">Sort</label>
<select name="sort" class="form-input" style="min-width:120px">
{% for col, label in [('impressions', 'Impressions'), ('clicks', 'Clicks'), ('ctr', 'CTR'), ('position_avg', 'Position'), ('pageviews', 'Pageviews'), ('published_at', 'Published')] %}
<option value="{{ col }}" {% if col == current_sort %}selected{% endif %}>{{ label }}</option>
{% endfor %}
</select>
</div>
<div>
<label class="text-xs font-semibold text-slate block mb-1">Dir</label>
<select name="dir" class="form-input" style="min-width:80px">
<option value="desc" {% if current_dir == 'desc' %}selected{% endif %}>Desc</option>
<option value="asc" {% if current_dir == 'asc' %}selected{% endif %}>Asc</option>
</select>
</div>
</form>
</div>
{% if scorecard %}
<div class="card" style="overflow-x:auto">
<table class="table">
<thead>
<tr>
<th>Title</th>
<th style="text-align:right">Impressions</th>
<th style="text-align:right">Clicks</th>
<th style="text-align:right">CTR</th>
<th style="text-align:right">Pos</th>
<th style="text-align:right">Views</th>
<th style="text-align:right">Bounce</th>
<th>Published</th>
<th>Flags</th>
</tr>
</thead>
<tbody>
{% for a in scorecard %}
<tr>
<td style="max-width:250px">
<a href="{{ a.url_path }}" target="_blank" class="text-sm" title="{{ a.url_path }}">{{ a.title or a.url_path }}</a>
{% if a.template_slug %}
<br><span class="text-xs text-slate">{{ a.template_slug }}</span>
{% endif %}
</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(a.impressions | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(a.clicks | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format((a.ctr or 0) * 100) }}%</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format(a.position_avg or 0) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(a.pageviews | int) }}</td>
<td style="text-align:right" class="mono text-sm">
{% if a.bounce_rate is not none %}{{ "%.0f" | format(a.bounce_rate * 100) }}%{% else %}-{% endif %}
</td>
<td class="mono text-sm">{{ a.published_at[:10] if a.published_at else '-' }}</td>
<td>
{% if a.flag_low_ctr %}
<span class="badge-warning" style="font-size:0.625rem">Low CTR</span>
{% endif %}
{% if a.flag_no_clicks %}
<span class="badge-danger" style="font-size:0.625rem">No Clicks</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<p class="text-xs text-slate mt-2">{{ scorecard | length }} articles shown</p>
{% else %}
<div class="card text-center" style="padding:2rem">
<p class="text-slate text-sm">No published articles match the current filters, or no search/analytics data synced yet.</p>
</div>
{% endif %}

View File

@@ -0,0 +1,132 @@
<!-- Source filter -->
<div class="flex gap-2 mb-4">
<button class="btn-outline btn-sm {% if not current_source %}font-bold{% endif %}"
hx-get="{{ url_for('admin.seo_search') }}?days={{ date_range_days }}"
hx-target="#tab-content">All</button>
<button class="btn-outline btn-sm {% if current_source == 'gsc' %}font-bold{% endif %}"
hx-get="{{ url_for('admin.seo_search') }}?days={{ date_range_days }}&source=gsc"
hx-target="#tab-content">GSC</button>
<button class="btn-outline btn-sm {% if current_source == 'bing' %}font-bold{% endif %}"
hx-get="{{ url_for('admin.seo_search') }}?days={{ date_range_days }}&source=bing"
hx-target="#tab-content">Bing</button>
</div>
<div class="grid-2 mb-6" style="gap:1.5rem">
<!-- Top Queries -->
<section>
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">Top Queries</p>
{% if queries %}
<div class="card">
<table class="table">
<thead>
<tr>
<th>Query</th>
<th style="text-align:right">Impressions</th>
<th style="text-align:right">Clicks</th>
<th style="text-align:right">CTR</th>
<th style="text-align:right">Pos</th>
</tr>
</thead>
<tbody>
{% for q in queries[:20] %}
<tr>
<td class="text-sm">{{ q.query }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(q.impressions | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(q.clicks | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format((q.ctr or 0) * 100) }}%</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format(q.position_avg or 0) }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="card text-center" style="padding:1.5rem">
<p class="text-slate text-sm">No query data yet. Run a sync to populate.</p>
</div>
{% endif %}
</section>
<!-- Top Pages -->
<section>
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">Top Pages</p>
{% if pages %}
<div class="card">
<table class="table">
<thead>
<tr>
<th>Page</th>
<th style="text-align:right">Impressions</th>
<th style="text-align:right">Clicks</th>
<th style="text-align:right">CTR</th>
<th style="text-align:right">Pos</th>
</tr>
</thead>
<tbody>
{% for p in pages[:20] %}
<tr>
<td class="text-sm" style="max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="{{ p.page_url }}">{{ p.page_url }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(p.impressions | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(p.clicks | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format((p.ctr or 0) * 100) }}%</td>
<td style="text-align:right" class="mono text-sm">{{ "%.1f" | format(p.position_avg or 0) }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="card text-center" style="padding:1.5rem">
<p class="text-slate text-sm">No page data yet.</p>
</div>
{% endif %}
</section>
</div>
<div class="grid-2" style="gap:1.5rem">
<!-- Country Breakdown -->
<section>
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">By Country</p>
{% if countries %}
<div class="card">
<table class="table">
<thead><tr><th>Country</th><th style="text-align:right">Impressions</th><th style="text-align:right">Clicks</th></tr></thead>
<tbody>
{% for c in countries[:15] %}
<tr>
<td class="text-sm">{{ c.country | upper }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(c.impressions | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(c.clicks | int) }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="card text-center" style="padding:1rem"><p class="text-slate text-sm">No country data.</p></div>
{% endif %}
</section>
<!-- Device Breakdown -->
<section>
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">By Device (GSC)</p>
{% if devices %}
<div class="card">
<table class="table">
<thead><tr><th>Device</th><th style="text-align:right">Impressions</th><th style="text-align:right">Clicks</th></tr></thead>
<tbody>
{% for d in devices %}
<tr>
<td class="text-sm">{{ d.device | capitalize }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(d.impressions | int) }}</td>
<td style="text-align:right" class="mono text-sm">{{ "{:,}".format(d.clicks | int) }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="card text-center" style="padding:1rem"><p class="text-slate text-sm">No device data (GSC only).</p></div>
{% endif %}
</section>
</div>

View File

@@ -0,0 +1,149 @@
{% extends "admin/base_admin.html" %}
{% set admin_page = "seo" %}
{% block title %}SEO Hub - Admin - {{ config.APP_NAME }}{% endblock %}
{% block admin_head %}
<style>
.seo-tabs { display: flex; gap: 0; border-bottom: 2px solid #E2E8F0; margin-bottom: 1.5rem; }
.seo-tabs button {
padding: 0.625rem 1.25rem; font-size: 0.8125rem; font-weight: 600;
color: #64748B; background: none; border: none; cursor: pointer;
border-bottom: 2px solid transparent; margin-bottom: -2px; transition: all 0.15s;
}
.seo-tabs button:hover { color: #1D4ED8; }
.seo-tabs button.active { color: #1D4ED8; border-bottom-color: #1D4ED8; }
.date-pills { display: flex; gap: 4px; }
.date-pills button {
padding: 4px 12px; font-size: 0.75rem; font-weight: 600;
border-radius: 999px; border: 1px solid #E2E8F0; background: #fff;
color: #64748B; cursor: pointer; transition: all 0.15s;
}
.date-pills button:hover { border-color: #1D4ED8; color: #1D4ED8; }
.date-pills button.active { background: #1D4ED8; color: #fff; border-color: #1D4ED8; }
.sync-row { display: flex; gap: 1rem; align-items: center; flex-wrap: wrap; }
.sync-badge { display: inline-flex; align-items: center; gap: 4px; font-size: 0.6875rem; padding: 2px 8px; border-radius: 4px; }
.sync-ok { background: #F0FDF4; color: #16A34A; }
.sync-fail { background: #FEF2F2; color: #DC2626; }
.sync-none { background: #F8FAFC; color: #94A3B8; }
</style>
{% endblock %}
{% block admin_content %}
<header class="flex justify-between items-center mb-6">
<div>
<h1 class="text-2xl">SEO & Analytics Hub</h1>
<p class="text-sm text-slate mt-1">Search performance, funnel metrics, and article scorecard</p>
</div>
<div class="flex gap-2 items-center">
<form method="post" action="{{ url_for('admin.seo_sync_now') }}" class="m-0">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<input type="hidden" name="source" value="all">
<button type="submit" class="btn btn-sm">Sync Now</button>
</form>
<a href="{{ url_for('admin.index') }}" class="btn-outline btn-sm">Dashboard</a>
</div>
</header>
<!-- Sync Status -->
<div class="sync-row mb-4">
<span class="text-xs font-semibold text-slate uppercase tracking-wider">Last sync:</span>
{% for s in sync_status %}
<span class="sync-badge {% if s.status == 'success' %}sync-ok{% elif s.status == 'failed' %}sync-fail{% else %}sync-none{% endif %}">
{{ s.source | upper }}
{% if s.status == 'success' %}
{{ s.completed_at[:16] if s.completed_at else '' }} ({{ s.rows_synced }} rows)
{% elif s.status == 'failed' %}
failed
{% endif %}
</span>
{% endfor %}
{% if not sync_status %}
<span class="sync-badge sync-none">No syncs yet</span>
{% endif %}
</div>
<!-- Date range selector -->
<div class="flex justify-between items-center mb-6">
<div class="date-pills" id="date-pills">
{% for d, label in [(7, '7d'), (28, '28d'), (90, '3m'), (180, '6m'), (365, '12m')] %}
<button class="{% if date_range_days == d %}active{% endif %}"
onclick="setDateRange({{ d }})">{{ label }}</button>
{% endfor %}
</div>
</div>
<!-- Overview Cards -->
<div style="display:grid;grid-template-columns:repeat(4,1fr);gap:0.75rem" class="mb-8">
<div class="card text-center" style="padding:0.75rem">
<p class="text-xs text-slate">Impressions</p>
<p class="text-xl font-bold text-navy">{{ "{:,}".format(overview.total_impressions | int) }}</p>
</div>
<div class="card text-center" style="padding:0.75rem">
<p class="text-xs text-slate">Clicks</p>
<p class="text-xl font-bold text-navy">{{ "{:,}".format(overview.total_clicks | int) }}</p>
</div>
<div class="card text-center" style="padding:0.75rem">
<p class="text-xs text-slate">Avg CTR</p>
<p class="text-xl font-bold text-navy">{{ "%.1f" | format(overview.avg_ctr * 100) }}%</p>
</div>
<div class="card text-center" style="padding:0.75rem">
<p class="text-xs text-slate">Avg Position</p>
<p class="text-xl font-bold text-navy">{{ "%.1f" | format(overview.avg_position) }}</p>
</div>
</div>
<!-- Tabs -->
<div class="seo-tabs" id="seo-tabs">
<button class="active" data-tab="search"
hx-get="{{ url_for('admin.seo_search') }}?days={{ date_range_days }}"
hx-target="#tab-content" hx-swap="innerHTML"
hx-trigger="click">Search Performance</button>
<button data-tab="funnel"
hx-get="{{ url_for('admin.seo_funnel') }}?days={{ date_range_days }}"
hx-target="#tab-content" hx-swap="innerHTML"
hx-trigger="click">Full Funnel</button>
<button data-tab="scorecard"
hx-get="{{ url_for('admin.seo_scorecard') }}?days={{ date_range_days }}"
hx-target="#tab-content" hx-swap="innerHTML"
hx-trigger="click">Article Scorecard</button>
</div>
<!-- Tab Content -->
<div id="tab-content"
hx-get="{{ url_for('admin.seo_search') }}?days={{ date_range_days }}"
hx-trigger="load"
hx-swap="innerHTML">
<div class="card text-center" style="padding:2rem">
<p class="text-slate">Loading...</p>
</div>
</div>
<script>
// Tab switching
document.getElementById('seo-tabs').addEventListener('click', function(e) {
if (e.target.tagName === 'BUTTON') {
this.querySelectorAll('button').forEach(b => b.classList.remove('active'));
e.target.classList.add('active');
}
});
// Date range switching
var currentDays = {{ date_range_days }};
function setDateRange(days) {
currentDays = days;
// Update pills
document.querySelectorAll('#date-pills button').forEach(function(b) {
b.classList.toggle('active', b.textContent.trim() === {7:'7d',28:'28d',90:'3m',180:'6m',365:'12m'}[days]);
});
// Re-fetch active tab with new date range
var activeTab = document.querySelector('#seo-tabs button.active');
if (activeTab) {
var url = activeTab.getAttribute('hx-get').split('?')[0] + '?days=' + days;
activeTab.setAttribute('hx-get', url);
htmx.ajax('GET', url, '#tab-content');
}
// Update overview cards
window.location.href = '{{ url_for("admin.seo") }}?days=' + days;
}
</script>
{% endblock %}

View File

@@ -52,6 +52,12 @@ class Config:
UMAMI_API_TOKEN: str = os.getenv("UMAMI_API_TOKEN", "")
UMAMI_WEBSITE_ID: str = "4474414b-58d6-4c6e-89a1-df5ea1f49d70"
# SEO metrics sync
GSC_SERVICE_ACCOUNT_PATH: str = os.getenv("GSC_SERVICE_ACCOUNT_PATH", "")
GSC_SITE_URL: str = os.getenv("GSC_SITE_URL", "")
BING_WEBMASTER_API_KEY: str = os.getenv("BING_WEBMASTER_API_KEY", "")
BING_SITE_URL: str = os.getenv("BING_SITE_URL", "")
RESEND_API_KEY: str = os.getenv("RESEND_API_KEY", "")
EMAIL_FROM: str = _env("EMAIL_FROM", "hello@padelnomics.io")
LEADS_EMAIL: str = _env("LEADS_EMAIL", "leads@padelnomics.io")

View File

@@ -0,0 +1,84 @@
"""Add SEO metrics tables for GSC, Bing, and Umami data sync.
Three tables:
- seo_search_metrics — daily search data per page+query (GSC + Bing)
- seo_analytics_metrics — daily page analytics (Umami)
- seo_sync_log — tracks sync state per source
"""
def up(conn):
# ── 1. Search metrics (GSC + Bing) ─────────────────────────────────
conn.execute("""
CREATE TABLE IF NOT EXISTS seo_search_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL,
metric_date TEXT NOT NULL,
page_url TEXT NOT NULL,
query TEXT,
country TEXT,
device TEXT,
clicks INTEGER NOT NULL DEFAULT 0,
impressions INTEGER NOT NULL DEFAULT 0,
ctr REAL,
position_avg REAL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
)
""")
# COALESCE converts NULLs to '' for unique index (SQLite treats
# NULL as distinct in UNIQUE constraints, causing duplicate rows)
conn.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS idx_seo_search_dedup
ON seo_search_metrics(
source, metric_date, page_url,
COALESCE(query, ''), COALESCE(country, ''), COALESCE(device, '')
)
""")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_seo_search_date"
" ON seo_search_metrics(metric_date)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_seo_search_page"
" ON seo_search_metrics(page_url)"
)
# ── 2. Analytics metrics (Umami) ───────────────────────────────────
conn.execute("""
CREATE TABLE IF NOT EXISTS seo_analytics_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
metric_date TEXT NOT NULL,
page_url TEXT NOT NULL,
pageviews INTEGER NOT NULL DEFAULT 0,
visitors INTEGER NOT NULL DEFAULT 0,
bounce_rate REAL,
time_avg_seconds INTEGER,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
)
""")
conn.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS idx_seo_analytics_dedup
ON seo_analytics_metrics(metric_date, page_url)
""")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_seo_analytics_date"
" ON seo_analytics_metrics(metric_date)"
)
# ── 3. Sync log ────────────────────────────────────────────────────
conn.execute("""
CREATE TABLE IF NOT EXISTS seo_sync_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL,
status TEXT NOT NULL,
rows_synced INTEGER NOT NULL DEFAULT 0,
error TEXT,
started_at TEXT NOT NULL,
completed_at TEXT,
duration_ms INTEGER
)
""")
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_seo_sync_source"
" ON seo_sync_log(source, started_at)"
)

View File

@@ -0,0 +1,36 @@
"""
SEO metrics sync and query module.
Syncs data from Google Search Console, Bing Webmaster Tools, and Umami
into SQLite tables. Query functions support the admin SEO hub views.
"""
from ._bing import sync_bing
from ._gsc import sync_gsc
from ._queries import (
cleanup_old_metrics,
get_article_scorecard,
get_country_breakdown,
get_device_breakdown,
get_funnel_metrics,
get_search_performance,
get_sync_status,
get_top_pages,
get_top_queries,
)
from ._umami import sync_umami
__all__ = [
"sync_gsc",
"sync_bing",
"sync_umami",
"get_search_performance",
"get_top_queries",
"get_top_pages",
"get_country_breakdown",
"get_device_breakdown",
"get_funnel_metrics",
"get_article_scorecard",
"get_sync_status",
"cleanup_old_metrics",
]

View File

@@ -0,0 +1,143 @@
"""Bing Webmaster Tools sync via REST API.
Uses an API key for auth. Fetches query stats and page stats.
"""
from datetime import datetime, timedelta
from urllib.parse import urlparse
import httpx
from ..core import config, execute
_TIMEOUT_SECONDS = 30
def _normalize_url(full_url: str) -> str:
"""Strip a full URL to just the path."""
parsed = urlparse(full_url)
return parsed.path or "/"
async def sync_bing(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
"""Sync Bing Webmaster query stats into seo_search_metrics. Returns rows synced."""
assert 1 <= days_back <= 90, "days_back must be 1-90"
assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
if not config.BING_WEBMASTER_API_KEY or not config.BING_SITE_URL:
return 0 # Bing not configured — skip silently
started_at = datetime.utcnow()
try:
rows_synced = 0
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
# Fetch query stats for the date range
response = await client.get(
"https://ssl.bing.com/webmaster/api.svc/json/GetQueryStats",
params={
"apikey": config.BING_WEBMASTER_API_KEY,
"siteUrl": config.BING_SITE_URL,
},
)
response.raise_for_status()
data = response.json()
# Bing returns {"d": [{"Query": ..., "Date": ..., ...}, ...]}
entries = data.get("d", []) if isinstance(data, dict) else data
if not isinstance(entries, list):
entries = []
cutoff = datetime.utcnow() - timedelta(days=days_back)
for entry in entries:
# Bing date format: "/Date(1708905600000)/" (ms since epoch)
date_str = entry.get("Date", "")
if "/Date(" in date_str:
ms = int(date_str.split("(")[1].split(")")[0])
entry_date = datetime.utcfromtimestamp(ms / 1000)
else:
continue
if entry_date < cutoff:
continue
metric_date = entry_date.strftime("%Y-%m-%d")
query = entry.get("Query", "")
await execute(
"""INSERT OR REPLACE INTO seo_search_metrics
(source, metric_date, page_url, query, country, device,
clicks, impressions, ctr, position_avg)
VALUES ('bing', ?, '/', ?, NULL, NULL, ?, ?, ?, ?)""",
(
metric_date, query,
entry.get("Clicks", 0),
entry.get("Impressions", 0),
entry.get("AvgCTR", 0.0),
entry.get("AvgClickPosition", 0.0),
),
)
rows_synced += 1
# Also fetch page-level stats
page_response = await client.get(
"https://ssl.bing.com/webmaster/api.svc/json/GetPageStats",
params={
"apikey": config.BING_WEBMASTER_API_KEY,
"siteUrl": config.BING_SITE_URL,
},
)
page_response.raise_for_status()
page_data = page_response.json()
page_entries = page_data.get("d", []) if isinstance(page_data, dict) else page_data
if not isinstance(page_entries, list):
page_entries = []
for entry in page_entries:
date_str = entry.get("Date", "")
if "/Date(" in date_str:
ms = int(date_str.split("(")[1].split(")")[0])
entry_date = datetime.utcfromtimestamp(ms / 1000)
else:
continue
if entry_date < cutoff:
continue
metric_date = entry_date.strftime("%Y-%m-%d")
page_url = _normalize_url(entry.get("Url", "/"))
await execute(
"""INSERT OR REPLACE INTO seo_search_metrics
(source, metric_date, page_url, query, country, device,
clicks, impressions, ctr, position_avg)
VALUES ('bing', ?, ?, '', NULL, NULL, ?, ?, NULL, NULL)""",
(
metric_date, page_url,
entry.get("Clicks", 0),
entry.get("Impressions", 0),
),
)
rows_synced += 1
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, started_at, completed_at, duration_ms)
VALUES ('bing', 'success', ?, ?, ?, ?)""",
(rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
return rows_synced
except Exception as exc:
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, error, started_at, completed_at, duration_ms)
VALUES ('bing', 'failed', 0, ?, ?, ?, ?)""",
(str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
raise

View File

@@ -0,0 +1,144 @@
"""Google Search Console sync via Search Analytics API.
Uses a service account JSON key file for auth. The google-api-python-client
is synchronous, so sync runs in asyncio.to_thread().
"""
import asyncio
import time
from datetime import datetime, timedelta
from pathlib import Path
from urllib.parse import urlparse
from ..core import config, execute
# GSC returns max 25K rows per request
_ROWS_PER_PAGE = 25_000
def _fetch_gsc_data(
start_date: str,
end_date: str,
max_pages: int,
) -> list[dict]:
"""Synchronous GSC fetch — called via asyncio.to_thread().
Returns list of dicts with keys: date, page, query, country, device,
clicks, impressions, ctr, position.
"""
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
key_path = Path(config.GSC_SERVICE_ACCOUNT_PATH)
assert key_path.exists(), f"GSC service account key not found: {key_path}"
credentials = Credentials.from_service_account_file(
str(key_path),
scopes=["https://www.googleapis.com/auth/webmasters.readonly"],
)
service = build("searchconsole", "v1", credentials=credentials)
all_rows = []
start_row = 0
for _page_num in range(max_pages):
body = {
"startDate": start_date,
"endDate": end_date,
"dimensions": ["date", "page", "query", "country", "device"],
"rowLimit": _ROWS_PER_PAGE,
"startRow": start_row,
}
response = service.searchanalytics().query(
siteUrl=config.GSC_SITE_URL,
body=body,
).execute()
rows = response.get("rows", [])
if not rows:
break
for row in rows:
keys = row["keys"]
all_rows.append({
"date": keys[0],
"page": keys[1],
"query": keys[2],
"country": keys[3],
"device": keys[4],
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": row.get("ctr", 0.0),
"position": row.get("position", 0.0),
})
if len(rows) < _ROWS_PER_PAGE:
break
start_row += _ROWS_PER_PAGE
return all_rows
def _normalize_url(full_url: str) -> str:
"""Strip a full URL to just the path (no domain).
Example: 'https://padelnomics.io/en/markets/germany/berlin''/en/markets/germany/berlin'
"""
parsed = urlparse(full_url)
return parsed.path or "/"
async def sync_gsc(days_back: int = 3, max_pages: int = 10) -> int:
"""Sync GSC search analytics into seo_search_metrics. Returns rows synced."""
assert 1 <= days_back <= 90, "days_back must be 1-90"
assert 1 <= max_pages <= 20, "max_pages must be 1-20"
if not config.GSC_SERVICE_ACCOUNT_PATH or not config.GSC_SITE_URL:
return 0 # GSC not configured — skip silently
started_at = datetime.utcnow()
# GSC has ~2 day delay; fetch from days_back ago to 2 days ago
end_date = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d")
start_date = (datetime.utcnow() - timedelta(days=days_back + 2)).strftime("%Y-%m-%d")
try:
rows = await asyncio.to_thread(
_fetch_gsc_data, start_date, end_date, max_pages,
)
rows_synced = 0
for row in rows:
page_url = _normalize_url(row["page"])
await execute(
"""INSERT OR REPLACE INTO seo_search_metrics
(source, metric_date, page_url, query, country, device,
clicks, impressions, ctr, position_avg)
VALUES ('gsc', ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
row["date"], page_url, row["query"], row["country"],
row["device"], row["clicks"], row["impressions"],
row["ctr"], row["position"],
),
)
rows_synced += 1
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, started_at, completed_at, duration_ms)
VALUES ('gsc', 'success', ?, ?, ?, ?)""",
(rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
return rows_synced
except Exception as exc:
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, error, started_at, completed_at, duration_ms)
VALUES ('gsc', 'failed', 0, ?, ?, ?, ?)""",
(str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
raise

View File

@@ -0,0 +1,379 @@
"""SQL query functions for the admin SEO hub views.
All heavy lifting happens in SQL. Functions accept filter parameters
and return plain dicts/lists.
"""
from datetime import datetime, timedelta
from ..core import execute, fetch_all, fetch_one
def _date_cutoff(date_range_days: int) -> str:
"""Return ISO date string for N days ago."""
return (datetime.utcnow() - timedelta(days=date_range_days)).strftime("%Y-%m-%d")
async def get_search_performance(
date_range_days: int = 28,
source: str | None = None,
) -> dict:
"""Aggregate search performance: total clicks, impressions, avg CTR, avg position."""
assert 1 <= date_range_days <= 730
cutoff = _date_cutoff(date_range_days)
source_filter = "AND source = ?" if source else ""
params = [cutoff]
if source:
params.append(source)
row = await fetch_one(
f"""SELECT
COALESCE(SUM(clicks), 0) AS total_clicks,
COALESCE(SUM(impressions), 0) AS total_impressions,
CASE WHEN SUM(impressions) > 0
THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
ELSE 0 END AS avg_ctr,
CASE WHEN SUM(impressions) > 0
THEN SUM(position_avg * impressions) / SUM(impressions)
ELSE 0 END AS avg_position
FROM seo_search_metrics
WHERE metric_date >= ? {source_filter}""",
tuple(params),
)
return dict(row) if row else {
"total_clicks": 0, "total_impressions": 0,
"avg_ctr": 0, "avg_position": 0,
}
async def get_top_queries(
date_range_days: int = 28,
source: str | None = None,
limit: int = 50,
) -> list[dict]:
"""Top queries by impressions with clicks, CTR, avg position."""
assert 1 <= date_range_days <= 730
assert 1 <= limit <= 500
cutoff = _date_cutoff(date_range_days)
source_filter = "AND source = ?" if source else ""
params: list = [cutoff]
if source:
params.append(source)
params.append(limit)
rows = await fetch_all(
f"""SELECT
query,
SUM(clicks) AS clicks,
SUM(impressions) AS impressions,
CASE WHEN SUM(impressions) > 0
THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
ELSE 0 END AS ctr,
CASE WHEN SUM(impressions) > 0
THEN SUM(position_avg * impressions) / SUM(impressions)
ELSE 0 END AS position_avg
FROM seo_search_metrics
WHERE metric_date >= ?
AND query IS NOT NULL AND query != ''
{source_filter}
GROUP BY query
ORDER BY impressions DESC
LIMIT ?""",
tuple(params),
)
return [dict(r) for r in rows]
async def get_top_pages(
date_range_days: int = 28,
source: str | None = None,
limit: int = 50,
) -> list[dict]:
"""Top pages by impressions with clicks, CTR, avg position."""
assert 1 <= date_range_days <= 730
assert 1 <= limit <= 500
cutoff = _date_cutoff(date_range_days)
source_filter = "AND source = ?" if source else ""
params: list = [cutoff]
if source:
params.append(source)
params.append(limit)
rows = await fetch_all(
f"""SELECT
page_url,
SUM(clicks) AS clicks,
SUM(impressions) AS impressions,
CASE WHEN SUM(impressions) > 0
THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
ELSE 0 END AS ctr,
CASE WHEN SUM(impressions) > 0
THEN SUM(position_avg * impressions) / SUM(impressions)
ELSE 0 END AS position_avg
FROM seo_search_metrics
WHERE metric_date >= ?
{source_filter}
GROUP BY page_url
ORDER BY impressions DESC
LIMIT ?""",
tuple(params),
)
return [dict(r) for r in rows]
async def get_country_breakdown(
date_range_days: int = 28,
) -> list[dict]:
"""Clicks and impressions by country."""
assert 1 <= date_range_days <= 730
cutoff = _date_cutoff(date_range_days)
rows = await fetch_all(
"""SELECT
country,
SUM(clicks) AS clicks,
SUM(impressions) AS impressions
FROM seo_search_metrics
WHERE metric_date >= ?
AND country IS NOT NULL AND country != ''
GROUP BY country
ORDER BY impressions DESC
LIMIT 50""",
(cutoff,),
)
return [dict(r) for r in rows]
async def get_device_breakdown(
date_range_days: int = 28,
) -> list[dict]:
"""Clicks and impressions by device type (GSC only)."""
assert 1 <= date_range_days <= 730
cutoff = _date_cutoff(date_range_days)
rows = await fetch_all(
"""SELECT
device,
SUM(clicks) AS clicks,
SUM(impressions) AS impressions
FROM seo_search_metrics
WHERE metric_date >= ?
AND source = 'gsc'
AND device IS NOT NULL AND device != ''
GROUP BY device
ORDER BY impressions DESC""",
(cutoff,),
)
return [dict(r) for r in rows]
async def get_funnel_metrics(
date_range_days: int = 28,
) -> dict:
"""Full funnel: search → analytics → conversions.
Combines search metrics (GSC/Bing), analytics (Umami), and
business metrics (planner users, leads) from SQLite.
"""
assert 1 <= date_range_days <= 730
cutoff = _date_cutoff(date_range_days)
# Search layer
search = await fetch_one(
"""SELECT
COALESCE(SUM(impressions), 0) AS impressions,
COALESCE(SUM(clicks), 0) AS clicks
FROM seo_search_metrics
WHERE metric_date >= ?""",
(cutoff,),
)
# Analytics layer
analytics = await fetch_one(
"""SELECT
COALESCE(SUM(pageviews), 0) AS pageviews,
COALESCE(SUM(visitors), 0) AS visitors
FROM seo_analytics_metrics
WHERE metric_date >= ?
AND page_url != '/'""",
(cutoff,),
)
# Business layer (from existing SQLite tables)
planner_users = await fetch_one(
"""SELECT COUNT(DISTINCT user_id) AS cnt
FROM scenarios
WHERE deleted_at IS NULL
AND created_at >= ?""",
(cutoff,),
)
leads = await fetch_one(
"""SELECT COUNT(*) AS cnt
FROM lead_requests
WHERE lead_type = 'quote'
AND created_at >= ?""",
(cutoff,),
)
imp = search["impressions"] if search else 0
clicks = search["clicks"] if search else 0
pvs = analytics["pageviews"] if analytics else 0
vis = analytics["visitors"] if analytics else 0
planners = planner_users["cnt"] if planner_users else 0
lead_count = leads["cnt"] if leads else 0
return {
"impressions": imp,
"clicks": clicks,
"pageviews": pvs,
"visitors": vis,
"planner_users": planners,
"leads": lead_count,
# Conversion rates between stages
"ctr": clicks / imp if imp > 0 else 0,
"click_to_view": pvs / clicks if clicks > 0 else 0,
"view_to_visitor": vis / pvs if pvs > 0 else 0,
"visitor_to_planner": planners / vis if vis > 0 else 0,
"planner_to_lead": lead_count / planners if planners > 0 else 0,
}
async def get_article_scorecard(
date_range_days: int = 28,
template_slug: str | None = None,
country: str | None = None,
language: str | None = None,
sort_by: str = "impressions",
sort_dir: str = "desc",
limit: int = 100,
) -> list[dict]:
"""Per-article scorecard joining articles + search + analytics metrics.
Returns article metadata enriched with search and analytics data,
plus attention flags for articles needing action.
"""
assert 1 <= date_range_days <= 730
assert 1 <= limit <= 500
assert sort_dir in ("asc", "desc")
# Allowlist sort columns to prevent SQL injection
sort_columns = {
"impressions", "clicks", "ctr", "position_avg",
"pageviews", "title", "published_at",
}
if sort_by not in sort_columns:
sort_by = "impressions"
cutoff = _date_cutoff(date_range_days)
wheres = ["a.status = 'published'"]
params: list = [cutoff, cutoff]
if template_slug:
wheres.append("a.template_slug = ?")
params.append(template_slug)
if country:
wheres.append("a.country = ?")
params.append(country)
if language:
wheres.append("a.language = ?")
params.append(language)
where_clause = " AND ".join(wheres)
params.append(limit)
rows = await fetch_all(
f"""SELECT
a.id,
a.title,
a.url_path,
a.template_slug,
a.country,
a.language,
a.published_at,
COALESCE(s.impressions, 0) AS impressions,
COALESCE(s.clicks, 0) AS clicks,
COALESCE(s.ctr, 0) AS ctr,
COALESCE(s.position_avg, 0) AS position_avg,
COALESCE(u.pageviews, 0) AS pageviews,
COALESCE(u.visitors, 0) AS visitors,
u.bounce_rate,
u.time_avg_seconds,
-- Attention flags
CASE WHEN COALESCE(s.impressions, 0) > 100
AND COALESCE(s.ctr, 0) < 0.02
THEN 1 ELSE 0 END AS flag_low_ctr,
CASE WHEN COALESCE(s.clicks, 0) = 0
AND a.published_at <= date('now', '-30 days')
THEN 1 ELSE 0 END AS flag_no_clicks
FROM articles a
LEFT JOIN (
SELECT page_url,
SUM(impressions) AS impressions,
SUM(clicks) AS clicks,
CASE WHEN SUM(impressions) > 0
THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
ELSE 0 END AS ctr,
CASE WHEN SUM(impressions) > 0
THEN SUM(position_avg * impressions) / SUM(impressions)
ELSE 0 END AS position_avg
FROM seo_search_metrics
WHERE metric_date >= ?
GROUP BY page_url
) s ON s.page_url = a.url_path
LEFT JOIN (
SELECT page_url,
SUM(pageviews) AS pageviews,
SUM(visitors) AS visitors,
AVG(bounce_rate) AS bounce_rate,
AVG(time_avg_seconds) AS time_avg_seconds
FROM seo_analytics_metrics
WHERE metric_date >= ?
GROUP BY page_url
) u ON u.page_url = a.url_path
WHERE {where_clause}
ORDER BY {sort_by} {sort_dir}
LIMIT ?""",
tuple(params),
)
return [dict(r) for r in rows]
async def get_sync_status() -> list[dict]:
"""Last sync status for each source (gsc, bing, umami)."""
rows = await fetch_all(
"""SELECT source, status, rows_synced, error,
started_at, completed_at, duration_ms
FROM seo_sync_log
WHERE id IN (
SELECT MAX(id) FROM seo_sync_log GROUP BY source
)
ORDER BY source"""
)
return [dict(r) for r in rows]
async def cleanup_old_metrics(retention_days: int = 365) -> int:
"""Delete metrics older than retention_days. Returns rows deleted."""
assert 30 <= retention_days <= 1095
cutoff = _date_cutoff(retention_days)
deleted_search = await execute(
"DELETE FROM seo_search_metrics WHERE metric_date < ?", (cutoff,)
)
deleted_analytics = await execute(
"DELETE FROM seo_analytics_metrics WHERE metric_date < ?", (cutoff,)
)
# Sync log: keep 30 days
sync_cutoff = _date_cutoff(30)
deleted_sync = await execute(
"DELETE FROM seo_sync_log WHERE started_at < ?", (sync_cutoff,)
)
return (deleted_search or 0) + (deleted_analytics or 0) + (deleted_sync or 0)

View File

@@ -0,0 +1,117 @@
"""Umami analytics sync via REST API.
Uses bearer token auth. Self-hosted instance, no rate limits.
Config already exists: UMAMI_API_URL, UMAMI_API_TOKEN, UMAMI_WEBSITE_ID.
"""
from datetime import datetime, timedelta
import httpx
from ..core import config, execute
_TIMEOUT_SECONDS = 15
async def sync_umami(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
"""Sync Umami per-URL metrics into seo_analytics_metrics. Returns rows synced."""
assert 1 <= days_back <= 90, "days_back must be 1-90"
assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
if not config.UMAMI_API_TOKEN or not config.UMAMI_API_URL:
return 0 # Umami not configured — skip silently
started_at = datetime.utcnow()
try:
rows_synced = 0
headers = {"Authorization": f"Bearer {config.UMAMI_API_TOKEN}"}
base = config.UMAMI_API_URL.rstrip("/")
website_id = config.UMAMI_WEBSITE_ID
async with httpx.AsyncClient(timeout=timeout_seconds, headers=headers) as client:
# Fetch per-URL metrics for each day individually
# (Umami's metrics endpoint returns totals for the period,
# so we query one day at a time for daily granularity)
for day_offset in range(days_back):
day = datetime.utcnow() - timedelta(days=day_offset + 1)
metric_date = day.strftime("%Y-%m-%d")
start_ms = int(day.replace(hour=0, minute=0, second=0).timestamp() * 1000)
end_ms = int(day.replace(hour=23, minute=59, second=59).timestamp() * 1000)
# Get URL-level metrics
response = await client.get(
f"{base}/api/websites/{website_id}/metrics",
params={
"startAt": start_ms,
"endAt": end_ms,
"type": "url",
"limit": 500,
},
)
response.raise_for_status()
url_metrics = response.json()
if not isinstance(url_metrics, list):
continue
for entry in url_metrics:
page_url = entry.get("x", "")
pageviews = entry.get("y", 0)
if not page_url:
continue
await execute(
"""INSERT OR REPLACE INTO seo_analytics_metrics
(metric_date, page_url, pageviews, visitors,
bounce_rate, time_avg_seconds)
VALUES (?, ?, ?, 0, NULL, NULL)""",
(metric_date, page_url, pageviews),
)
rows_synced += 1
# Try to get overall stats for bounce rate and visit duration
# (Umami doesn't provide per-URL bounce rate, only site-wide)
stats_response = await client.get(
f"{base}/api/websites/{website_id}/stats",
params={"startAt": start_ms, "endAt": end_ms},
)
if stats_response.status_code == 200:
stats = stats_response.json()
visitors = stats.get("visitors", {}).get("value", 0)
bounce_rate = stats.get("bounces", {}).get("value", 0)
total_time = stats.get("totaltime", {}).get("value", 0)
page_count = stats.get("pageviews", {}).get("value", 1) or 1
# Store site-wide stats on the root URL for the day
avg_time = int(total_time / max(visitors, 1))
br = bounce_rate / max(visitors, 1) if visitors else 0
await execute(
"""INSERT OR REPLACE INTO seo_analytics_metrics
(metric_date, page_url, pageviews, visitors,
bounce_rate, time_avg_seconds)
VALUES (?, '/', ?, ?, ?, ?)""",
(metric_date, page_count, visitors, br, avg_time),
)
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, started_at, completed_at, duration_ms)
VALUES ('umami', 'success', ?, ?, ?, ?)""",
(rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
return rows_synced
except Exception as exc:
duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, error, started_at, completed_at, duration_ms)
VALUES ('umami', 'failed', 0, ?, ?, ?, ?)""",
(str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
)
raise

View File

@@ -564,6 +564,45 @@ async def handle_cleanup_tasks(payload: dict) -> None:
)
# =============================================================================
# SEO Metrics Sync
# =============================================================================
@task("sync_gsc")
async def handle_sync_gsc(payload: dict) -> None:
"""Sync Google Search Console data."""
from .seo import sync_gsc
days_back = payload.get("days_back", 3)
rows = await sync_gsc(days_back=days_back)
print(f"[WORKER] GSC sync complete: {rows} rows")
@task("sync_bing")
async def handle_sync_bing(payload: dict) -> None:
"""Sync Bing Webmaster data."""
from .seo import sync_bing
days_back = payload.get("days_back", 3)
rows = await sync_bing(days_back=days_back)
print(f"[WORKER] Bing sync complete: {rows} rows")
@task("sync_umami")
async def handle_sync_umami(payload: dict) -> None:
"""Sync Umami analytics data."""
from .seo import sync_umami
days_back = payload.get("days_back", 3)
rows = await sync_umami(days_back=days_back)
print(f"[WORKER] Umami sync complete: {rows} rows")
@task("cleanup_seo_metrics")
async def handle_cleanup_seo_metrics(payload: dict) -> None:
"""Delete SEO metrics older than 12 months."""
from .seo import cleanup_old_metrics
deleted = await cleanup_old_metrics(retention_days=365)
print(f"[WORKER] Cleaned up {deleted} old SEO metric rows")
# =============================================================================
# Worker Loop
# =============================================================================
@@ -616,6 +655,7 @@ async def run_scheduler() -> None:
await init_db()
last_credit_refill = None
last_seo_sync_date = None
while True:
try:
@@ -633,6 +673,17 @@ async def run_scheduler() -> None:
last_credit_refill = this_month
print(f"[SCHEDULER] Queued monthly credit refill for {this_month}")
# Daily SEO metrics sync — run once per day after 6am UTC
# (GSC data has ~2 day delay, syncing at 6am ensures data is ready)
today_date = today.strftime("%Y-%m-%d")
if last_seo_sync_date != today_date and today.hour >= 6:
await enqueue("sync_gsc")
await enqueue("sync_bing")
await enqueue("sync_umami")
await enqueue("cleanup_seo_metrics")
last_seo_sync_date = today_date
print(f"[SCHEDULER] Queued SEO metric syncs for {today_date}")
await asyncio.sleep(3600) # 1 hour
except Exception as e: