"""Tests for the SEO metrics module: queries, sync functions, admin routes.""" from datetime import datetime, timedelta from unittest.mock import AsyncMock, MagicMock, patch import pytest from padelnomics.seo._queries import ( cleanup_old_metrics, get_article_scorecard, get_country_breakdown, get_device_breakdown, get_funnel_metrics, get_search_performance, get_sync_status, get_top_pages, get_top_queries, ) from padelnomics import core # ── Fixtures ────────────────────────────────────────────────── def _today(): return datetime.utcnow().strftime("%Y-%m-%d") def _days_ago(n: int) -> str: return (datetime.utcnow() - timedelta(days=n)).strftime("%Y-%m-%d") @pytest.fixture async def seo_data(db): """Populate seo_search_metrics and seo_analytics_metrics with sample data.""" today = _today() yesterday = _days_ago(1) # GSC search data rows = [ ("gsc", today, "/en/markets/germany/berlin", "padel berlin", "de", "mobile", 50, 500, 0.10, 5.2), ("gsc", today, "/en/markets/germany/munich", "padel munich", "de", "desktop", 30, 300, 0.10, 8.1), ("gsc", today, "/en/markets/germany/berlin", "padel court cost", "de", "desktop", 10, 200, 0.05, 12.0), ("gsc", yesterday, "/en/markets/germany/berlin", "padel berlin", "de", "mobile", 45, 480, 0.09, 5.5), # Bing data ("bing", today, "/", "padel business plan", None, None, 5, 100, 0.05, 15.0), ] for source, d, page, query, country, device, clicks, imp, ctr, pos in rows: await db.execute( """INSERT INTO seo_search_metrics (source, metric_date, page_url, query, country, device, clicks, impressions, ctr, position_avg) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", (source, d, page, query, country, device, clicks, imp, ctr, pos), ) # Umami analytics data analytics_rows = [ (today, "/en/markets/germany/berlin", 120, 80, 0.35, 45), (today, "/en/markets/germany/munich", 60, 40, 0.40, 30), (today, "/", 200, 150, 0.50, 20), ] for d, page, pv, vis, br, t in analytics_rows: await db.execute( """INSERT INTO seo_analytics_metrics (metric_date, page_url, pageviews, visitors, bounce_rate, time_avg_seconds) VALUES (?, ?, ?, ?, ?, ?)""", (d, page, pv, vis, br, t), ) await db.commit() @pytest.fixture async def articles_data(db, seo_data): """Create articles that match the SEO data URLs.""" now = datetime.utcnow().isoformat() pub = _days_ago(10) for title, url, tpl, lang in [ ("Padel in Berlin", "/en/markets/germany/berlin", "city-cost-de", "en"), ("Padel in Munich", "/en/markets/germany/munich", "city-cost-de", "en"), ]: await db.execute( """INSERT INTO articles (url_path, slug, title, template_slug, language, status, published_at, created_at) VALUES (?, ?, ?, ?, ?, 'published', ?, ?)""", (url, url.split("/")[-1], title, tpl, lang, pub, now), ) await db.commit() @pytest.fixture async def admin_client(app, db): """Authenticated admin client.""" now = datetime.utcnow().isoformat() async with db.execute( "INSERT INTO users (email, name, created_at) VALUES (?, ?, ?)", ("admin@test.com", "Admin", now), ) as cursor: admin_id = cursor.lastrowid await db.execute( "INSERT INTO user_roles (user_id, role) VALUES (?, 'admin')", (admin_id,) ) await db.commit() async with app.test_client() as c: async with c.session_transaction() as sess: sess["user_id"] = admin_id yield c # ── Query function tests ───────────────────────────────────── class TestSearchPerformance: """Tests for get_search_performance().""" async def test_returns_aggregate_metrics(self, db, seo_data): result = await get_search_performance(date_range_days=28) assert result["total_clicks"] > 0 assert result["total_impressions"] > 0 assert 0 < result["avg_ctr"] < 1 assert result["avg_position"] > 0 async def test_filter_by_source(self, db, seo_data): gsc = await get_search_performance(date_range_days=28, source="gsc") bing = await get_search_performance(date_range_days=28, source="bing") combined = await get_search_performance(date_range_days=28) assert gsc["total_clicks"] + bing["total_clicks"] == combined["total_clicks"] async def test_empty_data(self, db): result = await get_search_performance(date_range_days=28) assert result["total_clicks"] == 0 assert result["total_impressions"] == 0 async def test_date_range_filter(self, db, seo_data): # Only today's data should match 1-day range result = await get_search_performance(date_range_days=1) # Yesterday's data excluded — fewer total clicks full = await get_search_performance(date_range_days=28) assert result["total_clicks"] <= full["total_clicks"] class TestTopQueries: """Tests for get_top_queries().""" async def test_returns_queries_sorted_by_impressions(self, db, seo_data): queries = await get_top_queries(date_range_days=28) assert len(queries) > 0 # Should be sorted desc by impressions for i in range(len(queries) - 1): assert queries[i]["impressions"] >= queries[i + 1]["impressions"] async def test_limit(self, db, seo_data): queries = await get_top_queries(date_range_days=28, limit=2) assert len(queries) <= 2 async def test_filter_by_source(self, db, seo_data): gsc_queries = await get_top_queries(source="gsc") for q in gsc_queries: assert q["query"] != "padel business plan" # that's bing data class TestTopPages: """Tests for get_top_pages().""" async def test_returns_pages(self, db, seo_data): pages = await get_top_pages(date_range_days=28) assert len(pages) > 0 # Berlin page should be first (most impressions) assert pages[0]["page_url"] == "/en/markets/germany/berlin" class TestCountryBreakdown: """Tests for get_country_breakdown().""" async def test_returns_countries(self, db, seo_data): countries = await get_country_breakdown(date_range_days=28) assert len(countries) > 0 assert any(c["country"] == "de" for c in countries) class TestDeviceBreakdown: """Tests for get_device_breakdown().""" async def test_returns_devices_gsc_only(self, db, seo_data): devices = await get_device_breakdown(date_range_days=28) assert len(devices) > 0 device_names = [d["device"] for d in devices] assert "mobile" in device_names assert "desktop" in device_names class TestFunnelMetrics: """Tests for get_funnel_metrics().""" async def test_returns_all_stages(self, db, seo_data): funnel = await get_funnel_metrics(date_range_days=28) assert "impressions" in funnel assert "clicks" in funnel assert "pageviews" in funnel assert "visitors" in funnel assert "planner_users" in funnel assert "leads" in funnel async def test_conversion_rates(self, db, seo_data): funnel = await get_funnel_metrics(date_range_days=28) assert funnel["ctr"] > 0 # We have clicks and impressions assert 0 <= funnel["ctr"] <= 1 async def test_empty_data(self, db): funnel = await get_funnel_metrics(date_range_days=28) assert funnel["impressions"] == 0 assert funnel["planner_users"] == 0 class TestArticleScorecard: """Tests for get_article_scorecard().""" async def test_joins_articles_with_metrics(self, db, articles_data): scorecard = await get_article_scorecard(date_range_days=28) assert len(scorecard) == 2 berlin = next(a for a in scorecard if "berlin" in a["url_path"]) assert berlin["impressions"] > 0 assert berlin["pageviews"] > 0 async def test_filter_by_template(self, db, articles_data): scorecard = await get_article_scorecard( date_range_days=28, template_slug="city-cost-de", ) assert len(scorecard) == 2 for a in scorecard: assert a["template_slug"] == "city-cost-de" async def test_sort_by_clicks(self, db, articles_data): scorecard = await get_article_scorecard( date_range_days=28, sort_by="clicks", sort_dir="desc", ) if len(scorecard) >= 2: assert scorecard[0]["clicks"] >= scorecard[1]["clicks"] async def test_attention_flags(self, db, articles_data): """Berlin has >100 impressions and low CTR — should flag.""" scorecard = await get_article_scorecard(date_range_days=28) berlin = next(a for a in scorecard if "berlin" in a["url_path"]) # Berlin: 1180 impressions total, 105 clicks → CTR ~8.9% → no flag # Flags depend on actual data; just check fields exist assert "flag_low_ctr" in berlin assert "flag_no_clicks" in berlin async def test_invalid_sort_defaults_to_impressions(self, db, articles_data): scorecard = await get_article_scorecard( date_range_days=28, sort_by="invalid_column", ) # Should not crash — falls back to impressions assert len(scorecard) >= 0 class TestSyncStatus: """Tests for get_sync_status().""" async def test_returns_last_sync_per_source(self, db): now = datetime.utcnow().isoformat() await db.execute( """INSERT INTO seo_sync_log (source, status, rows_synced, started_at, completed_at, duration_ms) VALUES ('gsc', 'success', 100, ?, ?, 500)""", (now, now), ) await db.execute( """INSERT INTO seo_sync_log (source, status, rows_synced, started_at, completed_at, duration_ms) VALUES ('umami', 'failed', 0, ?, ?, 200)""", (now, now), ) await db.commit() status = await get_sync_status() assert len(status) == 2 sources = {s["source"] for s in status} assert "gsc" in sources assert "umami" in sources async def test_empty_when_no_syncs(self, db): status = await get_sync_status() assert status == [] class TestCleanupOldMetrics: """Tests for cleanup_old_metrics().""" async def test_deletes_old_data(self, db): old_date = (datetime.utcnow() - timedelta(days=400)).strftime("%Y-%m-%d") recent_date = _today() await db.execute( """INSERT INTO seo_search_metrics (source, metric_date, page_url, clicks, impressions) VALUES ('gsc', ?, '/old', 1, 10)""", (old_date,), ) await db.execute( """INSERT INTO seo_search_metrics (source, metric_date, page_url, clicks, impressions) VALUES ('gsc', ?, '/recent', 1, 10)""", (recent_date,), ) await db.commit() deleted = await cleanup_old_metrics(retention_days=365) assert deleted >= 1 rows = await core.fetch_all("SELECT * FROM seo_search_metrics") assert len(rows) == 1 assert rows[0]["page_url"] == "/recent" # ── Sync function tests (mocked HTTP) ──────────────────────── class TestSyncUmami: """Tests for sync_umami() with mocked HTTP.""" async def test_skips_when_not_configured(self, db): original = core.config.UMAMI_API_TOKEN core.config.UMAMI_API_TOKEN = "" try: from padelnomics.seo._umami import sync_umami result = await sync_umami(days_back=1) assert result == 0 finally: core.config.UMAMI_API_TOKEN = original async def test_syncs_url_metrics(self, db): from padelnomics.seo._umami import sync_umami core.config.UMAMI_API_TOKEN = "test-token" core.config.UMAMI_API_URL = "https://umami.test.io" mock_metrics = [ {"x": "/en/markets/germany/berlin", "y": 50}, {"x": "/en/markets/germany/munich", "y": 30}, ] mock_stats = { "visitors": {"value": 100}, "bounces": {"value": 30}, "totaltime": {"value": 5000}, "pageviews": {"value": 200}, } mock_response_metrics = MagicMock() mock_response_metrics.status_code = 200 mock_response_metrics.json.return_value = mock_metrics mock_response_metrics.raise_for_status = MagicMock() mock_response_stats = MagicMock() mock_response_stats.status_code = 200 mock_response_stats.json.return_value = mock_stats mock_response_stats.raise_for_status = MagicMock() async def mock_get(url, **kwargs): if "/metrics" in url: return mock_response_metrics return mock_response_stats mock_client = AsyncMock() mock_client.get = mock_get mock_client.__aenter__ = AsyncMock(return_value=mock_client) mock_client.__aexit__ = AsyncMock(return_value=None) with patch("padelnomics.seo._umami.httpx.AsyncClient", return_value=mock_client): result = await sync_umami(days_back=1) assert result == 2 # 2 URL metrics # Verify data stored rows = await core.fetch_all( "SELECT * FROM seo_analytics_metrics WHERE page_url != '/'" ) assert len(rows) == 2 # Verify sync log log = await core.fetch_all("SELECT * FROM seo_sync_log WHERE source = 'umami'") assert len(log) == 1 assert log[0]["status"] == "success" class TestSyncBing: """Tests for sync_bing() with mocked HTTP.""" async def test_skips_when_not_configured(self, db): original_key = core.config.BING_WEBMASTER_API_KEY core.config.BING_WEBMASTER_API_KEY = "" try: from padelnomics.seo._bing import sync_bing result = await sync_bing(days_back=1) assert result == 0 finally: core.config.BING_WEBMASTER_API_KEY = original_key class TestSyncGsc: """Tests for sync_gsc() with mocked Google API.""" async def test_skips_when_not_configured(self, db): original = core.config.GSC_SERVICE_ACCOUNT_PATH core.config.GSC_SERVICE_ACCOUNT_PATH = "" try: from padelnomics.seo._gsc import sync_gsc result = await sync_gsc(days_back=1) assert result == 0 finally: core.config.GSC_SERVICE_ACCOUNT_PATH = original # ── Admin route tests ───────────────────────────────────────── class TestSeoAdminRoutes: """Tests for the SEO hub admin routes.""" async def test_seo_hub_loads(self, admin_client, db): resp = await admin_client.get("/admin/seo") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "SEO" in text async def test_seo_hub_with_data(self, admin_client, db, seo_data): resp = await admin_client.get("/admin/seo?days=28") assert resp.status_code == 200 async def test_seo_search_partial(self, admin_client, db, seo_data): resp = await admin_client.get("/admin/seo/search?days=28") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Top Queries" in text async def test_seo_search_filter_by_source(self, admin_client, db, seo_data): resp = await admin_client.get("/admin/seo/search?days=28&source=gsc") assert resp.status_code == 200 async def test_seo_funnel_partial(self, admin_client, db, seo_data): resp = await admin_client.get("/admin/seo/funnel?days=28") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Impressions" in text async def test_seo_scorecard_partial(self, admin_client, db, articles_data): resp = await admin_client.get("/admin/seo/scorecard?days=28") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Berlin" in text or "scorecard" in text.lower() or "articles" in text.lower() async def test_seo_scorecard_filter(self, admin_client, db, articles_data): resp = await admin_client.get( "/admin/seo/scorecard?days=28&template_slug=city-cost-de&sort=clicks&dir=desc" ) assert resp.status_code == 200 async def test_seo_sync_requires_auth(self, client, db): resp = await client.post("/admin/seo/sync") # Should redirect to login (302) or return 403 assert resp.status_code in (302, 403) async def test_seo_sync_now(self, admin_client, db): """Sync Now enqueues tasks.""" async with admin_client.session_transaction() as sess: sess["csrf_token"] = "test" resp = await admin_client.post( "/admin/seo/sync", form={"source": "all", "csrf_token": "test"}, ) # Should redirect back to SEO hub assert resp.status_code == 302 # Verify tasks enqueued tasks = await core.fetch_all( "SELECT task_name FROM tasks WHERE task_name LIKE 'sync_%'" ) task_names = {t["task_name"] for t in tasks} assert "sync_gsc" in task_names assert "sync_bing" in task_names assert "sync_umami" in task_names async def test_seo_sync_single_source(self, admin_client, db): async with admin_client.session_transaction() as sess: sess["csrf_token"] = "test" resp = await admin_client.post( "/admin/seo/sync", form={"source": "gsc", "csrf_token": "test"}, ) assert resp.status_code == 302 tasks = await core.fetch_all("SELECT task_name FROM tasks WHERE task_name = 'sync_gsc'") assert len(tasks) == 1 async def test_seo_hub_date_range(self, admin_client, db, seo_data): for days in [7, 28, 90, 365]: resp = await admin_client.get(f"/admin/seo?days={days}") assert resp.status_code == 200 async def test_seo_sidebar_link(self, admin_client, db): resp = await admin_client.get("/admin/") text = await resp.get_data(as_text=True) assert "SEO Hub" in text # ── Assertion boundary tests ───────────────────────────────── class TestQueryBounds: """Test that query functions validate their bounds.""" async def test_search_performance_rejects_zero_days(self, db): with pytest.raises(AssertionError): await get_search_performance(date_range_days=0) async def test_top_queries_rejects_zero_limit(self, db): with pytest.raises(AssertionError): await get_top_queries(limit=0) async def test_cleanup_rejects_short_retention(self, db): with pytest.raises(AssertionError): await cleanup_old_metrics(retention_days=7) async def test_scorecard_rejects_invalid_sort_dir(self, db): with pytest.raises(AssertionError): await get_article_scorecard(sort_dir="invalid")