""" Tests for the pSEO Engine: health checks, content gaps, freshness, and admin routes. Covers: - content/health.py: get_template_stats, get_template_freshness, get_content_gaps, check_hreflang_orphans, check_missing_build_files, check_broken_scenario_refs, get_all_health_issues - admin/pseo_routes.py: all 6 routes (dashboard, health, gaps, generate, jobs, job status) """ import json from unittest.mock import patch import pytest from padelnomics.content.health import ( check_broken_scenario_refs, check_hreflang_orphans, check_missing_build_files, get_all_health_issues, get_content_gaps, get_template_freshness, get_template_stats, ) from padelnomics.core import execute, utcnow_iso from padelnomics import core # ── Fixtures ────────────────────────────────────────────────────────────────── @pytest.fixture async def admin_client(app, db): """Authenticated admin test client.""" now = utcnow_iso() async with db.execute( "INSERT INTO users (email, name, created_at) VALUES (?, ?, ?)", ("pseo-admin@test.com", "pSEO Admin", now), ) as cursor: admin_id = cursor.lastrowid await db.execute( "INSERT INTO user_roles (user_id, role) VALUES (?, 'admin')", (admin_id,) ) await db.commit() async with app.test_client() as c: async with c.session_transaction() as sess: sess["user_id"] = admin_id yield c # ── DB helpers ──────────────────────────────────────────────────────────────── async def _insert_article( slug, url_path, status="published", language="en", template_slug="city-cost-de", created_at=None, ): """Insert a minimal article row and return its id.""" ts = created_at or utcnow_iso() return await execute( """INSERT INTO articles (url_path, slug, title, meta_description, country, region, status, published_at, language, template_slug, created_at, updated_at) VALUES (?, ?, ?, ?, 'DE', 'Europe', ?, ?, ?, ?, ?, ?)""", ( url_path, slug, f"Title {slug}", f"Desc {slug}", status, ts if status == "published" else None, language, template_slug, ts, ts, ), ) async def _insert_scenario(slug="test-scenario"): """Insert a minimal published_scenario row.""" from padelnomics.planner.calculator import calc, validate_state state = validate_state({"dblCourts": 2}) d = calc(state) return await execute( """INSERT INTO published_scenarios (slug, title, subtitle, location, country, venue_type, ownership, court_config, state_json, calc_json) VALUES (?, ?, '', 'TestCity', 'TC', 'indoor', 'rent', '2 double', ?, ?)""", (slug, f"Scenario {slug}", json.dumps(state), json.dumps(d)), ) async def _insert_task(status="pending", progress_current=0, progress_total=0): """Insert a generate_articles task row and return its id.""" now = utcnow_iso() async with core._db.execute( """INSERT INTO tasks (task_name, payload, status, run_at, progress_current, progress_total, created_at) VALUES ('generate_articles', '{}', ?, ?, ?, ?, ?)""", (status, now, progress_current, progress_total, now), ) as cursor: task_id = cursor.lastrowid await core._db.commit() return task_id # ── DuckDB mock rows ────────────────────────────────────────────────────────── _DUCKDB_ROWS = [ {"city_slug": "berlin", "city": "Berlin", "country": "DE"}, {"city_slug": "munich", "city": "Munich", "country": "DE"}, {"city_slug": "hamburg", "city": "Hamburg", "country": "DE"}, ] async def _mock_fetch_duckdb(query, params=None): return _DUCKDB_ROWS # ════════════════════════════════════════════════════════════════════════════ # get_template_stats() # ════════════════════════════════════════════════════════════════════════════ class TestGetTemplateStats: async def test_empty_db_returns_zeros(self, db): stats = await get_template_stats("city-cost-de") assert stats["total"] == 0 assert stats["published"] == 0 assert stats["draft"] == 0 assert stats["by_language"] == {} async def test_counts_per_status(self, db): await _insert_article("city-cost-de-en-berlin", "/en/markets/germany/berlin", status="published", language="en") await _insert_article("city-cost-de-en-munich", "/en/markets/germany/munich", status="draft", language="en") await _insert_article("city-cost-de-de-berlin", "/de/markets/germany/berlin", status="published", language="de") stats = await get_template_stats("city-cost-de") assert stats["total"] == 3 assert stats["published"] == 2 assert stats["draft"] == 1 assert stats["by_language"]["en"]["total"] == 2 assert stats["by_language"]["de"]["total"] == 1 async def test_ignores_other_templates(self, db): await _insert_article("other-en-berlin", "/en/other/berlin", template_slug="other") stats = await get_template_stats("city-cost-de") assert stats["total"] == 0 # ════════════════════════════════════════════════════════════════════════════ # get_template_freshness() # ════════════════════════════════════════════════════════════════════════════ _SAMPLE_TEMPLATES = [ { "slug": "city-cost-de", "name": "City Cost DE", "data_table": "serving.pseo_city_costs_de", "languages": ["en", "de"], } ] class TestGetTemplateFreshness: async def test_no_meta_file_returns_no_data(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {}) result = await get_template_freshness(_SAMPLE_TEMPLATES) assert len(result) == 1 assert result[0]["status"] == "no_data" async def test_meta_present_no_articles_returns_no_articles(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: { "exported_at_utc": "2026-01-15T10:00:00+00:00", "tables": {"pseo_city_costs_de": {"row_count": 100}}, }) result = await get_template_freshness(_SAMPLE_TEMPLATES) assert result[0]["status"] == "no_articles" assert result[0]["row_count"] == 100 async def test_article_older_than_export_returns_stale(self, db, monkeypatch): import padelnomics.content.health as health_mod # Article created Jan 10, data exported Jan 15 → stale await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", status="published", language="en", created_at="2026-01-10T08:00:00", ) monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: { "exported_at_utc": "2026-01-15T10:00:00+00:00", "tables": {"pseo_city_costs_de": {"row_count": 100}}, }) result = await get_template_freshness(_SAMPLE_TEMPLATES) assert result[0]["status"] == "stale" async def test_article_newer_than_export_returns_fresh(self, db, monkeypatch): import padelnomics.content.health as health_mod # Data exported Jan 10, article updated Jan 15 → fresh await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", status="published", language="en", created_at="2026-01-15T12:00:00", ) monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: { "exported_at_utc": "2026-01-10T10:00:00+00:00", "tables": {}, }) result = await get_template_freshness(_SAMPLE_TEMPLATES) assert result[0]["status"] == "fresh" # ════════════════════════════════════════════════════════════════════════════ # get_content_gaps() # ════════════════════════════════════════════════════════════════════════════ class TestGetContentGaps: async def test_no_articles_returns_all_duckdb_rows(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb) gaps = await get_content_gaps( template_slug="city-cost-de", data_table="serving.pseo_city_costs_de", natural_key="city_slug", languages=["en"], ) assert len(gaps) == len(_DUCKDB_ROWS) assert all(g["_missing_languages"] == ["en"] for g in gaps) async def test_existing_article_excluded_from_gaps(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb) await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", ) gaps = await get_content_gaps( template_slug="city-cost-de", data_table="serving.pseo_city_costs_de", natural_key="city_slug", languages=["en"], ) gap_keys = {g["_natural_key"] for g in gaps} assert "berlin" not in gap_keys assert "munich" in gap_keys assert "hamburg" in gap_keys async def test_partial_language_gap_detected(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb) # EN exists for berlin, DE is missing → berlin has a gap for "de" await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", ) gaps = await get_content_gaps( template_slug="city-cost-de", data_table="serving.pseo_city_costs_de", natural_key="city_slug", languages=["en", "de"], ) berlin = next((g for g in gaps if g["_natural_key"] == "berlin"), None) assert berlin is not None assert berlin["_missing_languages"] == ["de"] async def test_no_gaps_when_all_articles_exist(self, db, monkeypatch): import padelnomics.content.health as health_mod monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb) for key in ("berlin", "munich", "hamburg"): await _insert_article( f"city-cost-de-en-{key}", f"/en/markets/germany/{key}", language="en", ) gaps = await get_content_gaps( template_slug="city-cost-de", data_table="serving.pseo_city_costs_de", natural_key="city_slug", languages=["en"], ) assert gaps == [] # ════════════════════════════════════════════════════════════════════════════ # check_hreflang_orphans() # ════════════════════════════════════════════════════════════════════════════ class TestCheckHreflangOrphans: async def test_single_lang_template_no_orphans(self, db): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}] await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) orphans = await check_hreflang_orphans(templates) assert orphans == [] async def test_bilingual_both_present_no_orphans(self, db): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}] await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) await _insert_article( "city-cost-de-de-berlin", "/de/markets/germany/berlin", language="de", status="published", ) orphans = await check_hreflang_orphans(templates) assert orphans == [] async def test_missing_de_sibling_detected(self, db): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}] # Only EN for berlin — DE is missing await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) orphans = await check_hreflang_orphans(templates) assert len(orphans) == 1 assert orphans[0]["template_slug"] == "city-cost-de" assert "de" in orphans[0]["missing_languages"] assert "en" in orphans[0]["present_languages"] async def test_draft_articles_not_counted(self, db): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}] # Draft articles should be ignored await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="draft", ) orphans = await check_hreflang_orphans(templates) assert orphans == [] # ════════════════════════════════════════════════════════════════════════════ # check_missing_build_files() # ════════════════════════════════════════════════════════════════════════════ class TestCheckMissingBuildFiles: async def test_no_articles_returns_empty(self, db, tmp_path): result = await check_missing_build_files(build_dir=tmp_path) assert result == [] async def test_build_file_present_not_reported(self, db, tmp_path): await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) build_file = tmp_path / "en" / "city-cost-de-en-berlin.html" build_file.parent.mkdir(parents=True) build_file.write_text("

Berlin

") result = await check_missing_build_files(build_dir=tmp_path) assert result == [] async def test_missing_build_file_reported(self, db, tmp_path): await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) # No build file created result = await check_missing_build_files(build_dir=tmp_path) assert len(result) == 1 assert result[0]["slug"] == "city-cost-de-en-berlin" assert result[0]["language"] == "en" async def test_draft_articles_ignored(self, db, tmp_path): await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="draft", ) result = await check_missing_build_files(build_dir=tmp_path) assert result == [] # ════════════════════════════════════════════════════════════════════════════ # check_broken_scenario_refs() # ════════════════════════════════════════════════════════════════════════════ class TestCheckBrokenScenarioRefs: async def test_no_markdown_files_returns_empty(self, db, tmp_path): await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) result = await check_broken_scenario_refs(build_dir=tmp_path) assert result == [] async def test_valid_scenario_ref_not_reported(self, db, tmp_path): await _insert_scenario("berlin-scenario") await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) md_dir = tmp_path / "en" / "md" md_dir.mkdir(parents=True) (md_dir / "city-cost-de-en-berlin.md").write_text( "# Berlin\n\n[scenario:berlin-scenario:capex]\n" ) result = await check_broken_scenario_refs(build_dir=tmp_path) assert result == [] async def test_missing_scenario_ref_reported(self, db, tmp_path): # No scenario in DB, but markdown references one await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) md_dir = tmp_path / "en" / "md" md_dir.mkdir(parents=True) (md_dir / "city-cost-de-en-berlin.md").write_text( "# Berlin\n\n[scenario:ghost-scenario:capex]\n" ) result = await check_broken_scenario_refs(build_dir=tmp_path) assert len(result) == 1 assert "ghost-scenario" in result[0]["broken_scenario_refs"] async def test_no_template_slug_articles_ignored(self, db, tmp_path): # Legacy article (no template_slug) should not be checked await execute( """INSERT INTO articles (url_path, slug, title, status, language, created_at) VALUES ('/en/legacy', 'legacy', 'Legacy', 'published', 'en', ?)""", (utcnow_iso(),), ) md_dir = tmp_path / "en" / "md" md_dir.mkdir(parents=True) (md_dir / "legacy.md").write_text("# Legacy\n\n[scenario:ghost]\n") result = await check_broken_scenario_refs(build_dir=tmp_path) assert result == [] # ════════════════════════════════════════════════════════════════════════════ # get_all_health_issues() # ════════════════════════════════════════════════════════════════════════════ class TestGetAllHealthIssues: async def test_clean_state_returns_zero_counts(self, db, tmp_path): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}] result = await get_all_health_issues(templates, build_dir=tmp_path) assert result["counts"]["total"] == 0 assert result["counts"]["hreflang_orphans"] == 0 assert result["counts"]["missing_build_files"] == 0 assert result["counts"]["broken_scenario_refs"] == 0 assert "hreflang_orphans" in result assert "missing_build_files" in result assert "broken_scenario_refs" in result async def test_orphan_counted_in_total(self, db, tmp_path): templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}] # EN article with no DE sibling → orphan await _insert_article( "city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en", status="published", ) result = await get_all_health_issues(templates, build_dir=tmp_path) assert result["counts"]["hreflang_orphans"] == 1 assert result["counts"]["total"] >= 1 # ════════════════════════════════════════════════════════════════════════════ # pSEO Route tests # ════════════════════════════════════════════════════════════════════════════ # Mock objects for route tests — avoids needing a live DuckDB _MOCK_TEMPLATE_CFG = { "slug": "city-cost-de", "name": "City Cost DE", "data_table": "serving.pseo_city_costs_de", "natural_key": "city_slug", "languages": ["en", "de"], "url_pattern": "/markets/{country}/{city_slug}", } _MOCK_TEMPLATES = [_MOCK_TEMPLATE_CFG] def _discover_mock(): return _MOCK_TEMPLATES def _load_template_mock(slug): if slug == "city-cost-de": return _MOCK_TEMPLATE_CFG raise FileNotFoundError(f"Template {slug!r} not found") async def _freshness_mock(templates): return [ { "slug": t["slug"], "name": t["name"], "data_table": t["data_table"], "status": "fresh", "exported_at_utc": None, "last_generated": None, "row_count": 100, } for t in templates ] async def _stats_mock(slug): return { "total": 10, "published": 8, "draft": 2, "scheduled": 0, "by_language": { "en": {"total": 5, "published": 4, "draft": 1, "scheduled": 0}, "de": {"total": 5, "published": 4, "draft": 1, "scheduled": 0}, }, } async def _health_mock(templates, build_dir=None): return { "hreflang_orphans": [], "missing_build_files": [], "broken_scenario_refs": [], "counts": {"hreflang_orphans": 0, "missing_build_files": 0, "broken_scenario_refs": 0, "total": 0}, } async def _gaps_empty_mock(template_slug, data_table, natural_key, languages, limit=200): return [] async def _gaps_two_mock(template_slug, data_table, natural_key, languages, limit=200): return [ {"city_slug": "munich", "_natural_key": "munich", "_missing_languages": ["en"]}, {"city_slug": "hamburg", "_natural_key": "hamburg", "_missing_languages": ["de"]}, ] class TestPseoRoutes: """Tests for all pSEO Engine admin blueprint routes.""" # -- Access control -------------------------------------------------------- async def test_dashboard_requires_admin(self, client, db): resp = await client.get("/admin/pseo/") assert resp.status_code in (302, 403) async def test_health_requires_admin(self, client, db): resp = await client.get("/admin/pseo/health") assert resp.status_code in (302, 403) async def test_gaps_requires_admin(self, client, db): resp = await client.get("/admin/pseo/gaps/city-cost-de") assert resp.status_code in (302, 403) async def test_jobs_requires_admin(self, client, db): resp = await client.get("/admin/pseo/jobs") assert resp.status_code in (302, 403) # -- Dashboard ------------------------------------------------------------- async def test_dashboard_renders(self, admin_client, db): with ( patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock), patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock), patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock), ): resp = await admin_client.get("/admin/pseo/") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "pSEO Engine" in text async def test_dashboard_shows_template_name(self, admin_client, db): with ( patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock), patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock), patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock), ): resp = await admin_client.get("/admin/pseo/") text = await resp.get_data(as_text=True) assert "City Cost DE" in text # -- Health HTMX partial --------------------------------------------------- async def test_health_partial_renders(self, admin_client, db): with ( patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock), patch("padelnomics.admin.pseo_routes.get_all_health_issues", _health_mock), ): resp = await admin_client.get("/admin/pseo/health") assert resp.status_code == 200 # -- Content gaps HTMX partial --------------------------------------------- async def test_gaps_unknown_template_returns_404(self, admin_client, db): def _raise(slug): raise FileNotFoundError("not found") with patch("padelnomics.admin.pseo_routes.load_template", _raise): resp = await admin_client.get("/admin/pseo/gaps/no-such-template") assert resp.status_code == 404 async def test_gaps_partial_renders(self, admin_client, db): with ( patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock), patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock), ): resp = await admin_client.get("/admin/pseo/gaps/city-cost-de") assert resp.status_code == 200 text = await resp.get_data(as_text=True) # Should show gap count or row content assert "munich" in text or "missing" in text.lower() async def test_gaps_empty_shows_no_gaps_message(self, admin_client, db): with ( patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock), patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock), ): resp = await admin_client.get("/admin/pseo/gaps/city-cost-de") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "No gaps" in text or "all" in text.lower() # -- Generate gaps POST ---------------------------------------------------- async def test_generate_gaps_redirects(self, admin_client, db): async with admin_client.session_transaction() as sess: sess["csrf_token"] = "test" with ( patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock), patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock), ): resp = await admin_client.post( "/admin/pseo/gaps/city-cost-de/generate", form={"csrf_token": "test"}, ) assert resp.status_code == 302 async def test_generate_gaps_enqueues_task(self, admin_client, db): async with admin_client.session_transaction() as sess: sess["csrf_token"] = "test" with ( patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock), patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock), ): await admin_client.post( "/admin/pseo/gaps/city-cost-de/generate", form={"csrf_token": "test"}, ) tasks = await core.fetch_all( "SELECT task_name FROM tasks WHERE task_name = 'generate_articles'" ) assert len(tasks) == 1 async def test_generate_gaps_no_gaps_redirects_without_task(self, admin_client, db): async with admin_client.session_transaction() as sess: sess["csrf_token"] = "test" with ( patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock), patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock), ): resp = await admin_client.post( "/admin/pseo/gaps/city-cost-de/generate", form={"csrf_token": "test"}, ) assert resp.status_code == 302 tasks = await core.fetch_all( "SELECT task_name FROM tasks WHERE task_name = 'generate_articles'" ) assert len(tasks) == 0 # -- Jobs list ------------------------------------------------------------- async def test_jobs_renders_empty(self, admin_client, db): resp = await admin_client.get("/admin/pseo/jobs") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Generation Jobs" in text async def test_jobs_shows_task_row(self, admin_client, db): await _insert_task(status="complete", progress_current=20, progress_total=20) resp = await admin_client.get("/admin/pseo/jobs") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Complete" in text # -- Job status HTMX polled ------------------------------------------------ async def test_job_status_not_found_returns_404(self, admin_client, db): resp = await admin_client.get("/admin/pseo/jobs/9999/status") assert resp.status_code == 404 async def test_job_status_renders_pending(self, admin_client, db): job_id = await _insert_task( status="pending", progress_current=5, progress_total=20 ) resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Running" in text async def test_job_status_renders_complete(self, admin_client, db): job_id = await _insert_task( status="complete", progress_current=20, progress_total=20 ) resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status") assert resp.status_code == 200 text = await resp.get_data(as_text=True) assert "Complete" in text async def test_job_status_complete_no_htmx_poll_trigger(self, admin_client, db): """A completed job should not include hx-trigger="every 2s" (stops HTMX polling).""" job_id = await _insert_task( status="complete", progress_current=20, progress_total=20 ) resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status") text = await resp.get_data(as_text=True) assert "every 2s" not in text async def test_job_status_pending_includes_htmx_poll_trigger(self, admin_client, db): """A pending job should include hx-trigger="every 2s" (keeps HTMX polling).""" job_id = await _insert_task( status="pending", progress_current=0, progress_total=20 ) resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status") text = await resp.get_data(as_text=True) assert "every 2s" in text