diff --git a/web/pyproject.toml b/web/pyproject.toml
index 59b0690..d03b01e 100644
--- a/web/pyproject.toml
+++ b/web/pyproject.toml
@@ -17,6 +17,9 @@ dependencies = [
"weasyprint>=68.1",
"duckdb>=1.0.0",
"pyyaml>=6.0",
+ "httpx>=0.27.0",
+ "google-api-python-client>=2.100.0",
+ "google-auth>=2.23.0",
]
[build-system]
diff --git a/web/src/padelnomics/admin/routes.py b/web/src/padelnomics/admin/routes.py
index 8bcf7c8..d4a800c 100644
--- a/web/src/padelnomics/admin/routes.py
+++ b/web/src/padelnomics/admin/routes.py
@@ -1401,3 +1401,134 @@ async def _rebuild_article(article_id: int):
body_html = await bake_scenario_cards(body_html, lang=lang)
BUILD_DIR.mkdir(parents=True, exist_ok=True)
(BUILD_DIR / f"{article['slug']}.html").write_text(body_html)
+
+
+# =============================================================================
+# SEO Hub
+# =============================================================================
+
+@bp.route("/seo")
+@role_required("admin")
+async def seo():
+ """SEO metrics hub — overview + tabs for search, funnel, scorecard."""
+ from ..seo import get_search_performance, get_sync_status
+
+ date_range_days = int(request.args.get("days", "28") or "28")
+ date_range_days = max(1, min(date_range_days, 730))
+
+ overview = await get_search_performance(date_range_days=date_range_days)
+ sync_status = await get_sync_status()
+
+ return await render_template(
+ "admin/seo.html",
+ overview=overview,
+ sync_status=sync_status,
+ date_range_days=date_range_days,
+ )
+
+
+@bp.route("/seo/search")
+@role_required("admin")
+async def seo_search():
+ """HTMX partial: search performance tab."""
+ from ..seo import (
+ get_country_breakdown,
+ get_device_breakdown,
+ get_top_pages,
+ get_top_queries,
+ )
+
+ days = int(request.args.get("days", "28") or "28")
+ days = max(1, min(days, 730))
+ source = request.args.get("source", "") or None
+
+ queries = await get_top_queries(date_range_days=days, source=source)
+ pages = await get_top_pages(date_range_days=days, source=source)
+ countries = await get_country_breakdown(date_range_days=days)
+ devices = await get_device_breakdown(date_range_days=days)
+
+ return await render_template(
+ "admin/partials/seo_search.html",
+ queries=queries,
+ pages=pages,
+ countries=countries,
+ devices=devices,
+ date_range_days=days,
+ current_source=source,
+ )
+
+
+@bp.route("/seo/funnel")
+@role_required("admin")
+async def seo_funnel():
+ """HTMX partial: full funnel view."""
+ from ..seo import get_funnel_metrics
+
+ days = int(request.args.get("days", "28") or "28")
+ days = max(1, min(days, 730))
+ funnel = await get_funnel_metrics(date_range_days=days)
+
+ return await render_template(
+ "admin/partials/seo_funnel.html",
+ funnel=funnel,
+ date_range_days=days,
+ )
+
+
+@bp.route("/seo/scorecard")
+@role_required("admin")
+async def seo_scorecard():
+ """HTMX partial: article scorecard."""
+ from ..seo import get_article_scorecard
+
+ days = int(request.args.get("days", "28") or "28")
+ days = max(1, min(days, 730))
+ template_slug = request.args.get("template_slug", "") or None
+ country_filter = request.args.get("country", "") or None
+ language = request.args.get("language", "") or None
+ sort_by = request.args.get("sort", "impressions")
+ sort_dir = request.args.get("dir", "desc")
+
+ scorecard = await get_article_scorecard(
+ date_range_days=days,
+ template_slug=template_slug,
+ country=country_filter,
+ language=language,
+ sort_by=sort_by,
+ sort_dir=sort_dir,
+ )
+
+ return await render_template(
+ "admin/partials/seo_scorecard.html",
+ scorecard=scorecard,
+ date_range_days=days,
+ current_template=template_slug,
+ current_country=country_filter,
+ current_language=language,
+ current_sort=sort_by,
+ current_dir=sort_dir,
+ )
+
+
+@bp.route("/seo/sync", methods=["POST"])
+@role_required("admin")
+@csrf_protect
+async def seo_sync_now():
+ """Manually trigger SEO data sync."""
+ from ..worker import enqueue
+
+ form = await request.form
+ source = form.get("source", "all")
+
+ if source == "all":
+ await enqueue("sync_gsc")
+ await enqueue("sync_bing")
+ await enqueue("sync_umami")
+ await flash("All SEO syncs queued.", "success")
+ elif source in ("gsc", "bing", "umami"):
+ await enqueue(f"sync_{source}")
+ await flash(f"{source.upper()} sync queued.", "success")
+ else:
+ await flash("Unknown source.", "error")
+
+ return redirect(url_for("admin.seo"))
diff --git a/web/src/padelnomics/admin/templates/admin/base_admin.html b/web/src/padelnomics/admin/templates/admin/base_admin.html
index d826e97..80f6730 100644
--- a/web/src/padelnomics/admin/templates/admin/base_admin.html
+++ b/web/src/padelnomics/admin/templates/admin/base_admin.html
@@ -86,6 +86,12 @@
Templates
+
+
diff --git a/web/src/padelnomics/admin/templates/admin/partials/seo_funnel.html b/web/src/padelnomics/admin/templates/admin/partials/seo_funnel.html
new file mode 100644
index 0000000..ece0800
--- /dev/null
+++ b/web/src/padelnomics/admin/templates/admin/partials/seo_funnel.html
@@ -0,0 +1,96 @@
+
+
+
+{% set max_val = [funnel.impressions, funnel.clicks, funnel.pageviews, funnel.visitors, funnel.planner_users, funnel.leads] | max or 1 %}
+
+
+
Search (GSC + Bing)
+
+
+
ImpressionsSearch results shown
+
+
+ {{ "{:,}".format(funnel.impressions | int) }}
+
+
+
+
+
ClicksCTR: {{ "%.1f" | format(funnel.ctr * 100) }}%
+
+
+ {{ "{:,}".format(funnel.clicks | int) }}
+
+
+
+
Analytics (Umami)
+
+
+
Pageviews{% if funnel.clicks %}{{ "%.0f" | format(funnel.click_to_view * 100) }}% of clicks{% endif %}
+
+
+ {{ "{:,}".format(funnel.pageviews | int) }}
+
+
+
+
+
VisitorsUnique
+
+
+ {{ "{:,}".format(funnel.visitors | int) }}
+
+
+
+
Conversions (App)
+
+
+
Planner Users{% if funnel.visitors %}{{ "%.1f" | format(funnel.visitor_to_planner * 100) }}% of visitors{% endif %}
+
+
+ {{ "{:,}".format(funnel.planner_users | int) }}
+
+
+
+
+
Lead Requests{% if funnel.planner_users %}{{ "%.1f" | format(funnel.planner_to_lead * 100) }}% of planners{% endif %}
+
+
+ {{ "{:,}".format(funnel.leads | int) }}
+
+
+
+
+{% if not funnel.impressions and not funnel.pageviews and not funnel.planner_users %}
+
+
No funnel data yet. Run a sync to populate search and analytics metrics.
+
+{% endif %}
diff --git a/web/src/padelnomics/admin/templates/admin/partials/seo_scorecard.html b/web/src/padelnomics/admin/templates/admin/partials/seo_scorecard.html
new file mode 100644
index 0000000..49071e8
--- /dev/null
+++ b/web/src/padelnomics/admin/templates/admin/partials/seo_scorecard.html
@@ -0,0 +1,104 @@
+
+
+
+
+
+{% if scorecard %}
+
+
+
+
+ | Title |
+ Impressions |
+ Clicks |
+ CTR |
+ Pos |
+ Views |
+ Bounce |
+ Published |
+ Flags |
+
+
+
+ {% for a in scorecard %}
+
+
+ {{ a.title or a.url_path }}
+ {% if a.template_slug %}
+ {{ a.template_slug }}
+ {% endif %}
+ |
+ {{ "{:,}".format(a.impressions | int) }} |
+ {{ "{:,}".format(a.clicks | int) }} |
+ {{ "%.1f" | format((a.ctr or 0) * 100) }}% |
+ {{ "%.1f" | format(a.position_avg or 0) }} |
+ {{ "{:,}".format(a.pageviews | int) }} |
+
+ {% if a.bounce_rate is not none %}{{ "%.0f" | format(a.bounce_rate * 100) }}%{% else %}-{% endif %}
+ |
+ {{ a.published_at[:10] if a.published_at else '-' }} |
+
+ {% if a.flag_low_ctr %}
+ Low CTR
+ {% endif %}
+ {% if a.flag_no_clicks %}
+ No Clicks
+ {% endif %}
+ |
+
+ {% endfor %}
+
+
+
+{{ scorecard | length }} articles shown
+{% else %}
+
+
No published articles match the current filters, or no search/analytics data synced yet.
+
+{% endif %}
diff --git a/web/src/padelnomics/admin/templates/admin/partials/seo_search.html b/web/src/padelnomics/admin/templates/admin/partials/seo_search.html
new file mode 100644
index 0000000..9499a30
--- /dev/null
+++ b/web/src/padelnomics/admin/templates/admin/partials/seo_search.html
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+
+
+
+ Top Queries
+ {% if queries %}
+
+
+
+
+ | Query |
+ Impressions |
+ Clicks |
+ CTR |
+ Pos |
+
+
+
+ {% for q in queries[:20] %}
+
+ | {{ q.query }} |
+ {{ "{:,}".format(q.impressions | int) }} |
+ {{ "{:,}".format(q.clicks | int) }} |
+ {{ "%.1f" | format((q.ctr or 0) * 100) }}% |
+ {{ "%.1f" | format(q.position_avg or 0) }} |
+
+ {% endfor %}
+
+
+
+ {% else %}
+
+
No query data yet. Run a sync to populate.
+
+ {% endif %}
+
+
+
+
+ Top Pages
+ {% if pages %}
+
+
+
+
+ | Page |
+ Impressions |
+ Clicks |
+ CTR |
+ Pos |
+
+
+
+ {% for p in pages[:20] %}
+
+ | {{ p.page_url }} |
+ {{ "{:,}".format(p.impressions | int) }} |
+ {{ "{:,}".format(p.clicks | int) }} |
+ {{ "%.1f" | format((p.ctr or 0) * 100) }}% |
+ {{ "%.1f" | format(p.position_avg or 0) }} |
+
+ {% endfor %}
+
+
+
+ {% else %}
+
+ {% endif %}
+
+
+
+
+
+
+ By Country
+ {% if countries %}
+
+
+ | Country | Impressions | Clicks |
+
+ {% for c in countries[:15] %}
+
+ | {{ c.country | upper }} |
+ {{ "{:,}".format(c.impressions | int) }} |
+ {{ "{:,}".format(c.clicks | int) }} |
+
+ {% endfor %}
+
+
+
+ {% else %}
+
+ {% endif %}
+
+
+
+
+ By Device (GSC)
+ {% if devices %}
+
+
+ | Device | Impressions | Clicks |
+
+ {% for d in devices %}
+
+ | {{ d.device | capitalize }} |
+ {{ "{:,}".format(d.impressions | int) }} |
+ {{ "{:,}".format(d.clicks | int) }} |
+
+ {% endfor %}
+
+
+
+ {% else %}
+ No device data (GSC only).
+ {% endif %}
+
+
diff --git a/web/src/padelnomics/admin/templates/admin/seo.html b/web/src/padelnomics/admin/templates/admin/seo.html
new file mode 100644
index 0000000..0b0f295
--- /dev/null
+++ b/web/src/padelnomics/admin/templates/admin/seo.html
@@ -0,0 +1,149 @@
+{% extends "admin/base_admin.html" %}
+{% set admin_page = "seo" %}
+{% block title %}SEO Hub - Admin - {{ config.APP_NAME }}{% endblock %}
+
+{% block admin_head %}
+
+{% endblock %}
+
+{% block admin_content %}
+
+
+
+
+ Last sync:
+ {% for s in sync_status %}
+
+ {{ s.source | upper }}
+ {% if s.status == 'success' %}
+ {{ s.completed_at[:16] if s.completed_at else '' }} ({{ s.rows_synced }} rows)
+ {% elif s.status == 'failed' %}
+ failed
+ {% endif %}
+
+ {% endfor %}
+ {% if not sync_status %}
+ No syncs yet
+ {% endif %}
+
+
+
+
+
+ {% for d, label in [(7, '7d'), (28, '28d'), (90, '3m'), (180, '6m'), (365, '12m')] %}
+
+ {% endfor %}
+
+
+
+
+
+
+
Impressions
+
{{ "{:,}".format(overview.total_impressions | int) }}
+
+
+
Clicks
+
{{ "{:,}".format(overview.total_clicks | int) }}
+
+
+
Avg CTR
+
{{ "%.1f" | format(overview.avg_ctr * 100) }}%
+
+
+
Avg Position
+
{{ "%.1f" | format(overview.avg_position) }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
diff --git a/web/src/padelnomics/core.py b/web/src/padelnomics/core.py
index 53aa510..b85284f 100644
--- a/web/src/padelnomics/core.py
+++ b/web/src/padelnomics/core.py
@@ -51,7 +51,13 @@ class Config:
UMAMI_API_URL: str = os.getenv("UMAMI_API_URL", "https://umami.padelnomics.io")
UMAMI_API_TOKEN: str = os.getenv("UMAMI_API_TOKEN", "")
UMAMI_WEBSITE_ID: str = "4474414b-58d6-4c6e-89a1-df5ea1f49d70"
-
+
+ # SEO metrics sync
+ GSC_SERVICE_ACCOUNT_PATH: str = os.getenv("GSC_SERVICE_ACCOUNT_PATH", "")
+ GSC_SITE_URL: str = os.getenv("GSC_SITE_URL", "")
+ BING_WEBMASTER_API_KEY: str = os.getenv("BING_WEBMASTER_API_KEY", "")
+ BING_SITE_URL: str = os.getenv("BING_SITE_URL", "")
+
RESEND_API_KEY: str = os.getenv("RESEND_API_KEY", "")
EMAIL_FROM: str = _env("EMAIL_FROM", "hello@padelnomics.io")
LEADS_EMAIL: str = _env("LEADS_EMAIL", "leads@padelnomics.io")
diff --git a/web/src/padelnomics/migrations/versions/0019_add_seo_metrics.py b/web/src/padelnomics/migrations/versions/0019_add_seo_metrics.py
new file mode 100644
index 0000000..aea6400
--- /dev/null
+++ b/web/src/padelnomics/migrations/versions/0019_add_seo_metrics.py
@@ -0,0 +1,84 @@
+"""Add SEO metrics tables for GSC, Bing, and Umami data sync.
+
+Three tables:
+ - seo_search_metrics — daily search data per page+query (GSC + Bing)
+ - seo_analytics_metrics — daily page analytics (Umami)
+ - seo_sync_log — tracks sync state per source
+"""
+
+
+def up(conn):
+ # ── 1. Search metrics (GSC + Bing) ─────────────────────────────────
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS seo_search_metrics (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ source TEXT NOT NULL,
+ metric_date TEXT NOT NULL,
+ page_url TEXT NOT NULL,
+ query TEXT,
+ country TEXT,
+ device TEXT,
+ clicks INTEGER NOT NULL DEFAULT 0,
+ impressions INTEGER NOT NULL DEFAULT 0,
+ ctr REAL,
+ position_avg REAL,
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
+ )
+ """)
+ # COALESCE converts NULLs to '' for unique index (SQLite treats
+ # NULL as distinct in UNIQUE constraints, causing duplicate rows)
+ conn.execute("""
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_seo_search_dedup
+ ON seo_search_metrics(
+ source, metric_date, page_url,
+ COALESCE(query, ''), COALESCE(country, ''), COALESCE(device, '')
+ )
+ """)
+ conn.execute(
+ "CREATE INDEX IF NOT EXISTS idx_seo_search_date"
+ " ON seo_search_metrics(metric_date)"
+ )
+ conn.execute(
+ "CREATE INDEX IF NOT EXISTS idx_seo_search_page"
+ " ON seo_search_metrics(page_url)"
+ )
+
+ # ── 2. Analytics metrics (Umami) ───────────────────────────────────
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS seo_analytics_metrics (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ metric_date TEXT NOT NULL,
+ page_url TEXT NOT NULL,
+ pageviews INTEGER NOT NULL DEFAULT 0,
+ visitors INTEGER NOT NULL DEFAULT 0,
+ bounce_rate REAL,
+ time_avg_seconds INTEGER,
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
+ )
+ """)
+ conn.execute("""
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_seo_analytics_dedup
+ ON seo_analytics_metrics(metric_date, page_url)
+ """)
+ conn.execute(
+ "CREATE INDEX IF NOT EXISTS idx_seo_analytics_date"
+ " ON seo_analytics_metrics(metric_date)"
+ )
+
+ # ── 3. Sync log ────────────────────────────────────────────────────
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS seo_sync_log (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ source TEXT NOT NULL,
+ status TEXT NOT NULL,
+ rows_synced INTEGER NOT NULL DEFAULT 0,
+ error TEXT,
+ started_at TEXT NOT NULL,
+ completed_at TEXT,
+ duration_ms INTEGER
+ )
+ """)
+ conn.execute(
+ "CREATE INDEX IF NOT EXISTS idx_seo_sync_source"
+ " ON seo_sync_log(source, started_at)"
+ )
diff --git a/web/src/padelnomics/seo/__init__.py b/web/src/padelnomics/seo/__init__.py
new file mode 100644
index 0000000..40a4e2b
--- /dev/null
+++ b/web/src/padelnomics/seo/__init__.py
@@ -0,0 +1,36 @@
+"""
+SEO metrics sync and query module.
+
+Syncs data from Google Search Console, Bing Webmaster Tools, and Umami
+into SQLite tables. Query functions support the admin SEO hub views.
+"""
+
+from ._bing import sync_bing
+from ._gsc import sync_gsc
+from ._queries import (
+ cleanup_old_metrics,
+ get_article_scorecard,
+ get_country_breakdown,
+ get_device_breakdown,
+ get_funnel_metrics,
+ get_search_performance,
+ get_sync_status,
+ get_top_pages,
+ get_top_queries,
+)
+from ._umami import sync_umami
+
+__all__ = [
+ "sync_gsc",
+ "sync_bing",
+ "sync_umami",
+ "get_search_performance",
+ "get_top_queries",
+ "get_top_pages",
+ "get_country_breakdown",
+ "get_device_breakdown",
+ "get_funnel_metrics",
+ "get_article_scorecard",
+ "get_sync_status",
+ "cleanup_old_metrics",
+]
diff --git a/web/src/padelnomics/seo/_bing.py b/web/src/padelnomics/seo/_bing.py
new file mode 100644
index 0000000..8effc5a
--- /dev/null
+++ b/web/src/padelnomics/seo/_bing.py
@@ -0,0 +1,143 @@
+"""Bing Webmaster Tools sync via REST API.
+
+Uses an API key for auth. Fetches query stats and page stats.
+"""
+
+from datetime import datetime, timedelta
+from urllib.parse import urlparse
+
+import httpx
+
+from ..core import config, execute
+
+
+_TIMEOUT_SECONDS = 30
+
+
+def _normalize_url(full_url: str) -> str:
+ """Strip a full URL to just the path."""
+ parsed = urlparse(full_url)
+ return parsed.path or "/"
+
+
+async def sync_bing(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
+ """Sync Bing Webmaster query stats into seo_search_metrics. Returns rows synced."""
+ assert 1 <= days_back <= 90, "days_back must be 1-90"
+ assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
+
+ if not config.BING_WEBMASTER_API_KEY or not config.BING_SITE_URL:
+ return 0 # Bing not configured — skip silently
+
+ started_at = datetime.utcnow()
+
+ try:
+ rows_synced = 0
+ async with httpx.AsyncClient(timeout=timeout_seconds) as client:
+ # Fetch query stats for the date range
+ response = await client.get(
+ "https://ssl.bing.com/webmaster/api.svc/json/GetQueryStats",
+ params={
+ "apikey": config.BING_WEBMASTER_API_KEY,
+ "siteUrl": config.BING_SITE_URL,
+ },
+ )
+ response.raise_for_status()
+ data = response.json()
+
+ # Bing returns {"d": [{"Query": ..., "Date": ..., ...}, ...]}
+ entries = data.get("d", []) if isinstance(data, dict) else data
+ if not isinstance(entries, list):
+ entries = []
+
+ cutoff = datetime.utcnow() - timedelta(days=days_back)
+
+ for entry in entries:
+ # Bing date format: "/Date(1708905600000)/" (ms since epoch)
+ date_str = entry.get("Date", "")
+ if "/Date(" in date_str:
+ ms = int(date_str.split("(")[1].split(")")[0])
+ entry_date = datetime.utcfromtimestamp(ms / 1000)
+ else:
+ continue
+
+ if entry_date < cutoff:
+ continue
+
+ metric_date = entry_date.strftime("%Y-%m-%d")
+ query = entry.get("Query", "")
+
+ await execute(
+ """INSERT OR REPLACE INTO seo_search_metrics
+ (source, metric_date, page_url, query, country, device,
+ clicks, impressions, ctr, position_avg)
+ VALUES ('bing', ?, '/', ?, NULL, NULL, ?, ?, ?, ?)""",
+ (
+ metric_date, query,
+ entry.get("Clicks", 0),
+ entry.get("Impressions", 0),
+ entry.get("AvgCTR", 0.0),
+ entry.get("AvgClickPosition", 0.0),
+ ),
+ )
+ rows_synced += 1
+
+ # Also fetch page-level stats
+ page_response = await client.get(
+ "https://ssl.bing.com/webmaster/api.svc/json/GetPageStats",
+ params={
+ "apikey": config.BING_WEBMASTER_API_KEY,
+ "siteUrl": config.BING_SITE_URL,
+ },
+ )
+ page_response.raise_for_status()
+ page_data = page_response.json()
+
+ page_entries = page_data.get("d", []) if isinstance(page_data, dict) else page_data
+ if not isinstance(page_entries, list):
+ page_entries = []
+
+ for entry in page_entries:
+ date_str = entry.get("Date", "")
+ if "/Date(" in date_str:
+ ms = int(date_str.split("(")[1].split(")")[0])
+ entry_date = datetime.utcfromtimestamp(ms / 1000)
+ else:
+ continue
+
+ if entry_date < cutoff:
+ continue
+
+ metric_date = entry_date.strftime("%Y-%m-%d")
+ page_url = _normalize_url(entry.get("Url", "/"))
+
+ await execute(
+ """INSERT OR REPLACE INTO seo_search_metrics
+ (source, metric_date, page_url, query, country, device,
+ clicks, impressions, ctr, position_avg)
+ VALUES ('bing', ?, ?, '', NULL, NULL, ?, ?, NULL, NULL)""",
+ (
+ metric_date, page_url,
+ entry.get("Clicks", 0),
+ entry.get("Impressions", 0),
+ ),
+ )
+ rows_synced += 1
+
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, started_at, completed_at, duration_ms)
+ VALUES ('bing', 'success', ?, ?, ?, ?)""",
+ (rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ return rows_synced
+
+ except Exception as exc:
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, error, started_at, completed_at, duration_ms)
+ VALUES ('bing', 'failed', 0, ?, ?, ?, ?)""",
+ (str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ raise
diff --git a/web/src/padelnomics/seo/_gsc.py b/web/src/padelnomics/seo/_gsc.py
new file mode 100644
index 0000000..9753160
--- /dev/null
+++ b/web/src/padelnomics/seo/_gsc.py
@@ -0,0 +1,144 @@
+"""Google Search Console sync via Search Analytics API.
+
+Uses a service account JSON key file for auth. The google-api-python-client
+is synchronous, so sync runs in asyncio.to_thread().
+"""
+
+import asyncio
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+from urllib.parse import urlparse
+
+from ..core import config, execute
+
+
+# GSC returns max 25K rows per request
+_ROWS_PER_PAGE = 25_000
+
+
+def _fetch_gsc_data(
+ start_date: str,
+ end_date: str,
+ max_pages: int,
+) -> list[dict]:
+ """Synchronous GSC fetch — called via asyncio.to_thread().
+
+ Returns list of dicts with keys: date, page, query, country, device,
+ clicks, impressions, ctr, position.
+ """
+ from google.oauth2.service_account import Credentials
+ from googleapiclient.discovery import build
+
+ key_path = Path(config.GSC_SERVICE_ACCOUNT_PATH)
+ assert key_path.exists(), f"GSC service account key not found: {key_path}"
+
+ credentials = Credentials.from_service_account_file(
+ str(key_path),
+ scopes=["https://www.googleapis.com/auth/webmasters.readonly"],
+ )
+ service = build("searchconsole", "v1", credentials=credentials)
+
+ all_rows = []
+ start_row = 0
+
+ for _page_num in range(max_pages):
+ body = {
+ "startDate": start_date,
+ "endDate": end_date,
+ "dimensions": ["date", "page", "query", "country", "device"],
+ "rowLimit": _ROWS_PER_PAGE,
+ "startRow": start_row,
+ }
+ response = service.searchanalytics().query(
+ siteUrl=config.GSC_SITE_URL,
+ body=body,
+ ).execute()
+
+ rows = response.get("rows", [])
+ if not rows:
+ break
+
+ for row in rows:
+ keys = row["keys"]
+ all_rows.append({
+ "date": keys[0],
+ "page": keys[1],
+ "query": keys[2],
+ "country": keys[3],
+ "device": keys[4],
+ "clicks": row.get("clicks", 0),
+ "impressions": row.get("impressions", 0),
+ "ctr": row.get("ctr", 0.0),
+ "position": row.get("position", 0.0),
+ })
+
+ if len(rows) < _ROWS_PER_PAGE:
+ break
+ start_row += _ROWS_PER_PAGE
+
+ return all_rows
+
+
+def _normalize_url(full_url: str) -> str:
+ """Strip a full URL to just the path (no domain).
+
+ Example: 'https://padelnomics.io/en/markets/germany/berlin' → '/en/markets/germany/berlin'
+ """
+ parsed = urlparse(full_url)
+ return parsed.path or "/"
+
+
+async def sync_gsc(days_back: int = 3, max_pages: int = 10) -> int:
+ """Sync GSC search analytics into seo_search_metrics. Returns rows synced."""
+ assert 1 <= days_back <= 90, "days_back must be 1-90"
+ assert 1 <= max_pages <= 20, "max_pages must be 1-20"
+
+ if not config.GSC_SERVICE_ACCOUNT_PATH or not config.GSC_SITE_URL:
+ return 0 # GSC not configured — skip silently
+
+ started_at = datetime.utcnow()
+
+ # GSC has ~2 day delay; fetch from days_back ago to 2 days ago
+ end_date = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d")
+ start_date = (datetime.utcnow() - timedelta(days=days_back + 2)).strftime("%Y-%m-%d")
+
+ try:
+ rows = await asyncio.to_thread(
+ _fetch_gsc_data, start_date, end_date, max_pages,
+ )
+
+ rows_synced = 0
+ for row in rows:
+ page_url = _normalize_url(row["page"])
+ await execute(
+ """INSERT OR REPLACE INTO seo_search_metrics
+ (source, metric_date, page_url, query, country, device,
+ clicks, impressions, ctr, position_avg)
+ VALUES ('gsc', ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+ (
+ row["date"], page_url, row["query"], row["country"],
+ row["device"], row["clicks"], row["impressions"],
+ row["ctr"], row["position"],
+ ),
+ )
+ rows_synced += 1
+
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, started_at, completed_at, duration_ms)
+ VALUES ('gsc', 'success', ?, ?, ?, ?)""",
+ (rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ return rows_synced
+
+ except Exception as exc:
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, error, started_at, completed_at, duration_ms)
+ VALUES ('gsc', 'failed', 0, ?, ?, ?, ?)""",
+ (str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ raise
diff --git a/web/src/padelnomics/seo/_queries.py b/web/src/padelnomics/seo/_queries.py
new file mode 100644
index 0000000..94434c0
--- /dev/null
+++ b/web/src/padelnomics/seo/_queries.py
@@ -0,0 +1,379 @@
+"""SQL query functions for the admin SEO hub views.
+
+All heavy lifting happens in SQL. Functions accept filter parameters
+and return plain dicts/lists.
+"""
+
+from datetime import datetime, timedelta
+
+from ..core import execute, fetch_all, fetch_one
+
+
+def _date_cutoff(date_range_days: int) -> str:
+ """Return ISO date string for N days ago."""
+ return (datetime.utcnow() - timedelta(days=date_range_days)).strftime("%Y-%m-%d")
+
+
+async def get_search_performance(
+ date_range_days: int = 28,
+ source: str | None = None,
+) -> dict:
+ """Aggregate search performance: total clicks, impressions, avg CTR, avg position."""
+ assert 1 <= date_range_days <= 730
+
+ cutoff = _date_cutoff(date_range_days)
+ source_filter = "AND source = ?" if source else ""
+ params = [cutoff]
+ if source:
+ params.append(source)
+
+ row = await fetch_one(
+ f"""SELECT
+ COALESCE(SUM(clicks), 0) AS total_clicks,
+ COALESCE(SUM(impressions), 0) AS total_impressions,
+ CASE WHEN SUM(impressions) > 0
+ THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
+ ELSE 0 END AS avg_ctr,
+ CASE WHEN SUM(impressions) > 0
+ THEN SUM(position_avg * impressions) / SUM(impressions)
+ ELSE 0 END AS avg_position
+ FROM seo_search_metrics
+ WHERE metric_date >= ? {source_filter}""",
+ tuple(params),
+ )
+ return dict(row) if row else {
+ "total_clicks": 0, "total_impressions": 0,
+ "avg_ctr": 0, "avg_position": 0,
+ }
+
+
+async def get_top_queries(
+ date_range_days: int = 28,
+ source: str | None = None,
+ limit: int = 50,
+) -> list[dict]:
+ """Top queries by impressions with clicks, CTR, avg position."""
+ assert 1 <= date_range_days <= 730
+ assert 1 <= limit <= 500
+
+ cutoff = _date_cutoff(date_range_days)
+ source_filter = "AND source = ?" if source else ""
+ params: list = [cutoff]
+ if source:
+ params.append(source)
+ params.append(limit)
+
+ rows = await fetch_all(
+ f"""SELECT
+ query,
+ SUM(clicks) AS clicks,
+ SUM(impressions) AS impressions,
+ CASE WHEN SUM(impressions) > 0
+ THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
+ ELSE 0 END AS ctr,
+ CASE WHEN SUM(impressions) > 0
+ THEN SUM(position_avg * impressions) / SUM(impressions)
+ ELSE 0 END AS position_avg
+ FROM seo_search_metrics
+ WHERE metric_date >= ?
+ AND query IS NOT NULL AND query != ''
+ {source_filter}
+ GROUP BY query
+ ORDER BY impressions DESC
+ LIMIT ?""",
+ tuple(params),
+ )
+ return [dict(r) for r in rows]
+
+
+async def get_top_pages(
+ date_range_days: int = 28,
+ source: str | None = None,
+ limit: int = 50,
+) -> list[dict]:
+ """Top pages by impressions with clicks, CTR, avg position."""
+ assert 1 <= date_range_days <= 730
+ assert 1 <= limit <= 500
+
+ cutoff = _date_cutoff(date_range_days)
+ source_filter = "AND source = ?" if source else ""
+ params: list = [cutoff]
+ if source:
+ params.append(source)
+ params.append(limit)
+
+ rows = await fetch_all(
+ f"""SELECT
+ page_url,
+ SUM(clicks) AS clicks,
+ SUM(impressions) AS impressions,
+ CASE WHEN SUM(impressions) > 0
+ THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
+ ELSE 0 END AS ctr,
+ CASE WHEN SUM(impressions) > 0
+ THEN SUM(position_avg * impressions) / SUM(impressions)
+ ELSE 0 END AS position_avg
+ FROM seo_search_metrics
+ WHERE metric_date >= ?
+ {source_filter}
+ GROUP BY page_url
+ ORDER BY impressions DESC
+ LIMIT ?""",
+ tuple(params),
+ )
+ return [dict(r) for r in rows]
+
+
+async def get_country_breakdown(
+ date_range_days: int = 28,
+) -> list[dict]:
+ """Clicks and impressions by country."""
+ assert 1 <= date_range_days <= 730
+
+ cutoff = _date_cutoff(date_range_days)
+ rows = await fetch_all(
+ """SELECT
+ country,
+ SUM(clicks) AS clicks,
+ SUM(impressions) AS impressions
+ FROM seo_search_metrics
+ WHERE metric_date >= ?
+ AND country IS NOT NULL AND country != ''
+ GROUP BY country
+ ORDER BY impressions DESC
+ LIMIT 50""",
+ (cutoff,),
+ )
+ return [dict(r) for r in rows]
+
+
+async def get_device_breakdown(
+ date_range_days: int = 28,
+) -> list[dict]:
+ """Clicks and impressions by device type (GSC only)."""
+ assert 1 <= date_range_days <= 730
+
+ cutoff = _date_cutoff(date_range_days)
+ rows = await fetch_all(
+ """SELECT
+ device,
+ SUM(clicks) AS clicks,
+ SUM(impressions) AS impressions
+ FROM seo_search_metrics
+ WHERE metric_date >= ?
+ AND source = 'gsc'
+ AND device IS NOT NULL AND device != ''
+ GROUP BY device
+ ORDER BY impressions DESC""",
+ (cutoff,),
+ )
+ return [dict(r) for r in rows]
+
+
+async def get_funnel_metrics(
+ date_range_days: int = 28,
+) -> dict:
+ """Full funnel: search → analytics → conversions.
+
+ Combines search metrics (GSC/Bing), analytics (Umami), and
+ business metrics (planner users, leads) from SQLite.
+ """
+ assert 1 <= date_range_days <= 730
+
+ cutoff = _date_cutoff(date_range_days)
+
+ # Search layer
+ search = await fetch_one(
+ """SELECT
+ COALESCE(SUM(impressions), 0) AS impressions,
+ COALESCE(SUM(clicks), 0) AS clicks
+ FROM seo_search_metrics
+ WHERE metric_date >= ?""",
+ (cutoff,),
+ )
+
+ # Analytics layer
+ analytics = await fetch_one(
+ """SELECT
+ COALESCE(SUM(pageviews), 0) AS pageviews,
+ COALESCE(SUM(visitors), 0) AS visitors
+ FROM seo_analytics_metrics
+ WHERE metric_date >= ?
+ AND page_url != '/'""",
+ (cutoff,),
+ )
+
+ # Business layer (from existing SQLite tables)
+ planner_users = await fetch_one(
+ """SELECT COUNT(DISTINCT user_id) AS cnt
+ FROM scenarios
+ WHERE deleted_at IS NULL
+ AND created_at >= ?""",
+ (cutoff,),
+ )
+
+ leads = await fetch_one(
+ """SELECT COUNT(*) AS cnt
+ FROM lead_requests
+ WHERE lead_type = 'quote'
+ AND created_at >= ?""",
+ (cutoff,),
+ )
+
+ imp = search["impressions"] if search else 0
+ clicks = search["clicks"] if search else 0
+ pvs = analytics["pageviews"] if analytics else 0
+ vis = analytics["visitors"] if analytics else 0
+ planners = planner_users["cnt"] if planner_users else 0
+ lead_count = leads["cnt"] if leads else 0
+
+ return {
+ "impressions": imp,
+ "clicks": clicks,
+ "pageviews": pvs,
+ "visitors": vis,
+ "planner_users": planners,
+ "leads": lead_count,
+ # Conversion rates between stages
+ "ctr": clicks / imp if imp > 0 else 0,
+ "click_to_view": pvs / clicks if clicks > 0 else 0,
+ "view_to_visitor": vis / pvs if pvs > 0 else 0,
+ "visitor_to_planner": planners / vis if vis > 0 else 0,
+ "planner_to_lead": lead_count / planners if planners > 0 else 0,
+ }
+
+
+async def get_article_scorecard(
+ date_range_days: int = 28,
+ template_slug: str | None = None,
+ country: str | None = None,
+ language: str | None = None,
+ sort_by: str = "impressions",
+ sort_dir: str = "desc",
+ limit: int = 100,
+) -> list[dict]:
+ """Per-article scorecard joining articles + search + analytics metrics.
+
+ Returns article metadata enriched with search and analytics data,
+ plus attention flags for articles needing action.
+ """
+ assert 1 <= date_range_days <= 730
+ assert 1 <= limit <= 500
+ assert sort_dir in ("asc", "desc")
+
+ # Allowlist sort columns to prevent SQL injection
+ sort_columns = {
+ "impressions", "clicks", "ctr", "position_avg",
+ "pageviews", "title", "published_at",
+ }
+ if sort_by not in sort_columns:
+ sort_by = "impressions"
+
+ cutoff = _date_cutoff(date_range_days)
+
+ wheres = ["a.status = 'published'"]
+ params: list = [cutoff, cutoff]
+
+ if template_slug:
+ wheres.append("a.template_slug = ?")
+ params.append(template_slug)
+ if country:
+ wheres.append("a.country = ?")
+ params.append(country)
+ if language:
+ wheres.append("a.language = ?")
+ params.append(language)
+
+ where_clause = " AND ".join(wheres)
+ params.append(limit)
+
+ rows = await fetch_all(
+ f"""SELECT
+ a.id,
+ a.title,
+ a.url_path,
+ a.template_slug,
+ a.country,
+ a.language,
+ a.published_at,
+ COALESCE(s.impressions, 0) AS impressions,
+ COALESCE(s.clicks, 0) AS clicks,
+ COALESCE(s.ctr, 0) AS ctr,
+ COALESCE(s.position_avg, 0) AS position_avg,
+ COALESCE(u.pageviews, 0) AS pageviews,
+ COALESCE(u.visitors, 0) AS visitors,
+ u.bounce_rate,
+ u.time_avg_seconds,
+ -- Attention flags
+ CASE WHEN COALESCE(s.impressions, 0) > 100
+ AND COALESCE(s.ctr, 0) < 0.02
+ THEN 1 ELSE 0 END AS flag_low_ctr,
+ CASE WHEN COALESCE(s.clicks, 0) = 0
+ AND a.published_at <= date('now', '-30 days')
+ THEN 1 ELSE 0 END AS flag_no_clicks
+ FROM articles a
+ LEFT JOIN (
+ SELECT page_url,
+ SUM(impressions) AS impressions,
+ SUM(clicks) AS clicks,
+ CASE WHEN SUM(impressions) > 0
+ THEN CAST(SUM(clicks) AS REAL) / SUM(impressions)
+ ELSE 0 END AS ctr,
+ CASE WHEN SUM(impressions) > 0
+ THEN SUM(position_avg * impressions) / SUM(impressions)
+ ELSE 0 END AS position_avg
+ FROM seo_search_metrics
+ WHERE metric_date >= ?
+ GROUP BY page_url
+ ) s ON s.page_url = a.url_path
+ LEFT JOIN (
+ SELECT page_url,
+ SUM(pageviews) AS pageviews,
+ SUM(visitors) AS visitors,
+ AVG(bounce_rate) AS bounce_rate,
+ AVG(time_avg_seconds) AS time_avg_seconds
+ FROM seo_analytics_metrics
+ WHERE metric_date >= ?
+ GROUP BY page_url
+ ) u ON u.page_url = a.url_path
+ WHERE {where_clause}
+ ORDER BY {sort_by} {sort_dir}
+ LIMIT ?""",
+ tuple(params),
+ )
+ return [dict(r) for r in rows]
+
+
+async def get_sync_status() -> list[dict]:
+ """Last sync status for each source (gsc, bing, umami)."""
+ rows = await fetch_all(
+ """SELECT source, status, rows_synced, error,
+ started_at, completed_at, duration_ms
+ FROM seo_sync_log
+ WHERE id IN (
+ SELECT MAX(id) FROM seo_sync_log GROUP BY source
+ )
+ ORDER BY source"""
+ )
+ return [dict(r) for r in rows]
+
+
+async def cleanup_old_metrics(retention_days: int = 365) -> int:
+ """Delete metrics older than retention_days. Returns rows deleted."""
+ assert 30 <= retention_days <= 1095
+
+ cutoff = _date_cutoff(retention_days)
+
+ deleted_search = await execute(
+ "DELETE FROM seo_search_metrics WHERE metric_date < ?", (cutoff,)
+ )
+ deleted_analytics = await execute(
+ "DELETE FROM seo_analytics_metrics WHERE metric_date < ?", (cutoff,)
+ )
+ # Sync log: keep 30 days
+ sync_cutoff = _date_cutoff(30)
+ deleted_sync = await execute(
+ "DELETE FROM seo_sync_log WHERE started_at < ?", (sync_cutoff,)
+ )
+
+ return (deleted_search or 0) + (deleted_analytics or 0) + (deleted_sync or 0)
diff --git a/web/src/padelnomics/seo/_umami.py b/web/src/padelnomics/seo/_umami.py
new file mode 100644
index 0000000..33a7083
--- /dev/null
+++ b/web/src/padelnomics/seo/_umami.py
@@ -0,0 +1,117 @@
+"""Umami analytics sync via REST API.
+
+Uses bearer token auth. Self-hosted instance, no rate limits.
+Config already exists: UMAMI_API_URL, UMAMI_API_TOKEN, UMAMI_WEBSITE_ID.
+"""
+
+from datetime import datetime, timedelta
+
+import httpx
+
+from ..core import config, execute
+
+
+_TIMEOUT_SECONDS = 15
+
+
+async def sync_umami(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
+ """Sync Umami per-URL metrics into seo_analytics_metrics. Returns rows synced."""
+ assert 1 <= days_back <= 90, "days_back must be 1-90"
+ assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
+
+ if not config.UMAMI_API_TOKEN or not config.UMAMI_API_URL:
+ return 0 # Umami not configured — skip silently
+
+ started_at = datetime.utcnow()
+
+ try:
+ rows_synced = 0
+ headers = {"Authorization": f"Bearer {config.UMAMI_API_TOKEN}"}
+ base = config.UMAMI_API_URL.rstrip("/")
+ website_id = config.UMAMI_WEBSITE_ID
+
+ async with httpx.AsyncClient(timeout=timeout_seconds, headers=headers) as client:
+ # Fetch per-URL metrics for each day individually
+ # (Umami's metrics endpoint returns totals for the period,
+ # so we query one day at a time for daily granularity)
+ for day_offset in range(days_back):
+ day = datetime.utcnow() - timedelta(days=day_offset + 1)
+ metric_date = day.strftime("%Y-%m-%d")
+ start_ms = int(day.replace(hour=0, minute=0, second=0).timestamp() * 1000)
+ end_ms = int(day.replace(hour=23, minute=59, second=59).timestamp() * 1000)
+
+ # Get URL-level metrics
+ response = await client.get(
+ f"{base}/api/websites/{website_id}/metrics",
+ params={
+ "startAt": start_ms,
+ "endAt": end_ms,
+ "type": "url",
+ "limit": 500,
+ },
+ )
+ response.raise_for_status()
+ url_metrics = response.json()
+
+ if not isinstance(url_metrics, list):
+ continue
+
+ for entry in url_metrics:
+ page_url = entry.get("x", "")
+ pageviews = entry.get("y", 0)
+
+ if not page_url:
+ continue
+
+ await execute(
+ """INSERT OR REPLACE INTO seo_analytics_metrics
+ (metric_date, page_url, pageviews, visitors,
+ bounce_rate, time_avg_seconds)
+ VALUES (?, ?, ?, 0, NULL, NULL)""",
+ (metric_date, page_url, pageviews),
+ )
+ rows_synced += 1
+
+ # Try to get overall stats for bounce rate and visit duration
+ # (Umami doesn't provide per-URL bounce rate, only site-wide)
+ stats_response = await client.get(
+ f"{base}/api/websites/{website_id}/stats",
+ params={"startAt": start_ms, "endAt": end_ms},
+ )
+ if stats_response.status_code == 200:
+ stats = stats_response.json()
+ visitors = stats.get("visitors", {}).get("value", 0)
+ bounce_rate = stats.get("bounces", {}).get("value", 0)
+ total_time = stats.get("totaltime", {}).get("value", 0)
+ page_count = stats.get("pageviews", {}).get("value", 1) or 1
+
+ # Store site-wide stats on the root URL for the day
+ avg_time = int(total_time / max(visitors, 1))
+ br = bounce_rate / max(visitors, 1) if visitors else 0
+
+ await execute(
+ """INSERT OR REPLACE INTO seo_analytics_metrics
+ (metric_date, page_url, pageviews, visitors,
+ bounce_rate, time_avg_seconds)
+ VALUES (?, '/', ?, ?, ?, ?)""",
+ (metric_date, page_count, visitors, br, avg_time),
+ )
+
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, started_at, completed_at, duration_ms)
+ VALUES ('umami', 'success', ?, ?, ?, ?)""",
+ (rows_synced, started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ return rows_synced
+
+ except Exception as exc:
+ duration_ms = int((datetime.utcnow() - started_at).total_seconds() * 1000)
+ await execute(
+ """INSERT INTO seo_sync_log
+ (source, status, rows_synced, error, started_at, completed_at, duration_ms)
+ VALUES ('umami', 'failed', 0, ?, ?, ?, ?)""",
+ (str(exc), started_at.isoformat(), datetime.utcnow().isoformat(), duration_ms),
+ )
+ raise
diff --git a/web/src/padelnomics/worker.py b/web/src/padelnomics/worker.py
index 6b8fa4b..a718af3 100644
--- a/web/src/padelnomics/worker.py
+++ b/web/src/padelnomics/worker.py
@@ -564,6 +564,45 @@ async def handle_cleanup_tasks(payload: dict) -> None:
)
+# =============================================================================
+# SEO Metrics Sync
+# =============================================================================
+
+@task("sync_gsc")
+async def handle_sync_gsc(payload: dict) -> None:
+ """Sync Google Search Console data."""
+ from .seo import sync_gsc
+ days_back = payload.get("days_back", 3)
+ rows = await sync_gsc(days_back=days_back)
+ print(f"[WORKER] GSC sync complete: {rows} rows")
+
+
+@task("sync_bing")
+async def handle_sync_bing(payload: dict) -> None:
+ """Sync Bing Webmaster data."""
+ from .seo import sync_bing
+ days_back = payload.get("days_back", 3)
+ rows = await sync_bing(days_back=days_back)
+ print(f"[WORKER] Bing sync complete: {rows} rows")
+
+
+@task("sync_umami")
+async def handle_sync_umami(payload: dict) -> None:
+ """Sync Umami analytics data."""
+ from .seo import sync_umami
+ days_back = payload.get("days_back", 3)
+ rows = await sync_umami(days_back=days_back)
+ print(f"[WORKER] Umami sync complete: {rows} rows")
+
+
+@task("cleanup_seo_metrics")
+async def handle_cleanup_seo_metrics(payload: dict) -> None:
+ """Delete SEO metrics older than 12 months."""
+ from .seo import cleanup_old_metrics
+ deleted = await cleanup_old_metrics(retention_days=365)
+ print(f"[WORKER] Cleaned up {deleted} old SEO metric rows")
+
+
# =============================================================================
# Worker Loop
# =============================================================================
@@ -616,6 +655,7 @@ async def run_scheduler() -> None:
await init_db()
last_credit_refill = None
+ last_seo_sync_date = None
while True:
try:
@@ -633,8 +673,19 @@ async def run_scheduler() -> None:
last_credit_refill = this_month
print(f"[SCHEDULER] Queued monthly credit refill for {this_month}")
+ # Daily SEO metrics sync — run once per day after 6am UTC
+ # (GSC data has ~2 day delay, syncing at 6am ensures data is ready)
+ today_date = today.strftime("%Y-%m-%d")
+ if last_seo_sync_date != today_date and today.hour >= 6:
+ await enqueue("sync_gsc")
+ await enqueue("sync_bing")
+ await enqueue("sync_umami")
+ await enqueue("cleanup_seo_metrics")
+ last_seo_sync_date = today_date
+ print(f"[SCHEDULER] Queued SEO metric syncs for {today_date}")
+
await asyncio.sleep(3600) # 1 hour
-
+
except Exception as e:
print(f"[SCHEDULER] Error: {e}")
await asyncio.sleep(60)