""" Tests for pSEO article noindex feature. Covers: - NOINDEX_THRESHOLDS: lambda functions evaluate correctly per template - Sitemap excludes articles with noindex=1 - Article detail page emits for noindex articles - Article detail page has no robots meta tag for indexable articles """ from datetime import UTC, datetime from unittest.mock import AsyncMock, patch import pytest from padelnomics import core from padelnomics.content import NOINDEX_THRESHOLDS # ── Threshold unit tests ───────────────────────────────────────────────────── class TestNoindexThresholds: def test_city_pricing_low_venue_count_is_noindex(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({"venue_count": 0}) is True assert check({"venue_count": 1}) is True assert check({"venue_count": 2}) is True def test_city_pricing_sufficient_venues_is_indexable(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({"venue_count": 3}) is False assert check({"venue_count": 10}) is False def test_city_pricing_missing_venue_count_treated_as_zero(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({}) is True assert check({"venue_count": None}) is True def test_city_cost_de_partial_data_is_noindex(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({"data_confidence": 0.0}) is True assert check({"data_confidence": 0.5}) is True assert check({"data_confidence": 0.99}) is True def test_city_cost_de_full_confidence_is_indexable(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({"data_confidence": 1.0}) is False def test_city_cost_de_missing_confidence_is_noindex(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({}) is True assert check({"data_confidence": None}) is True def test_country_overview_low_venues_is_noindex(self): check = NOINDEX_THRESHOLDS["country-overview"] assert check({"total_venues": 0}) is True assert check({"total_venues": 4}) is True def test_country_overview_sufficient_venues_is_indexable(self): check = NOINDEX_THRESHOLDS["country-overview"] assert check({"total_venues": 5}) is False assert check({"total_venues": 100}) is False def test_unknown_template_slug_has_no_threshold(self): assert "manual" not in NOINDEX_THRESHOLDS assert "unknown-template" not in NOINDEX_THRESHOLDS # ── Sitemap exclusion ──────────────────────────────────────────────────────── async def _insert_article( db, url_path: str = "/markets/de/berlin", title: str = "Test Article", language: str = "en", noindex: int = 0, ) -> int: """Insert a published article row and return its id.""" # Use a past published_at in SQLite-compatible format (space separator, no tz). # SQLite's datetime('now') returns "YYYY-MM-DD HH:MM:SS" with a space. # ISO format with T is lexicographically AFTER the space format for the # same instant, so current-time ISO strings fail the <= datetime('now') check. published_at = "2020-01-01 08:00:00" created_at = datetime.now(UTC).isoformat() async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, ?, '', 'published', ?, 'city-pricing', ?, ?, ?)""", (url_path, f"slug-{url_path.replace('/', '-')}", title, published_at, language, noindex, created_at), ) as cursor: article_id = cursor.lastrowid await db.commit() return article_id class TestSitemapNoindex: async def test_indexable_article_in_sitemap(self, client, db): """Article with noindex=0 should appear in sitemap.""" await _insert_article(db, url_path="/markets/en/berlin", noindex=0) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "/markets/en/berlin" in xml async def test_noindex_article_excluded_from_sitemap(self, client, db): """Article with noindex=1 must NOT appear in sitemap.""" await _insert_article(db, url_path="/markets/en/thin-city", noindex=1) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "/markets/en/thin-city" not in xml async def test_mixed_articles_only_indexable_in_sitemap(self, client, db): """Only indexable articles appear; noindex articles are silently dropped.""" await _insert_article(db, url_path="/markets/en/good-city", noindex=0) await _insert_article(db, url_path="/markets/en/bad-city", noindex=1) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "good-city" in xml assert "bad-city" not in xml # ── Article detail robots meta tag ────────────────────────────────────────── class TestArticleDetailRobotsTag: """ Test that the article detail template emits (or omits) the robots meta tag. We test via the content blueprint's article route. Routes.py imports BUILD_DIR from content/__init__.py at module load time, so we must patch padelnomics.content.routes.BUILD_DIR (the local binding), not padelnomics.content.BUILD_DIR. """ async def test_noindex_article_has_robots_meta(self, client, db, tmp_path, monkeypatch): """Article with noindex=1 → in HTML.""" import padelnomics.content.routes as routes_mod build_dir = tmp_path / "en" build_dir.mkdir(parents=True) url_path = "/markets/noindex-test" slug = "city-pricing-en-noindex-test" (build_dir / f"{slug}.html").write_text("
Article body
") monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path) # Use past published_at in SQLite space-separator format async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, 'Noindex Test', '', 'published', '2020-01-01 08:00:00', 'city-pricing', 'en', 1, datetime('now'))""", (url_path, slug), ) as cursor: pass await db.commit() resp = await client.get(f"/en{url_path}") assert resp.status_code == 200 html = (await resp.data).decode() assert 'name="robots"' in html assert "noindex" in html async def test_indexable_article_has_no_robots_meta(self, client, db, tmp_path, monkeypatch): """Article with noindex=0 → no robots meta tag in HTML.""" import padelnomics.content.routes as routes_mod build_dir = tmp_path / "en" build_dir.mkdir(parents=True) url_path = "/markets/indexable-test" slug = "city-pricing-en-indexable-test" (build_dir / f"{slug}.html").write_text("Article body
") monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path) async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, 'Indexable Test', '', 'published', '2020-01-01 08:00:00', 'city-pricing', 'en', 0, datetime('now'))""", (url_path, slug), ) as cursor: pass await db.commit() resp = await client.get(f"/en{url_path}") assert resp.status_code == 200 html = (await resp.data).decode() assert 'content="noindex' not in html