""" Tests for pSEO article noindex feature. Covers: - NOINDEX_THRESHOLDS: lambda functions evaluate correctly per template - Sitemap excludes articles with noindex=1 - Article detail page emits for noindex articles - Article detail page has no robots meta tag for indexable articles """ from datetime import UTC, datetime from padelnomics.content import NOINDEX_THRESHOLDS # ── Threshold unit tests ───────────────────────────────────────────────────── class TestNoindexThresholds: def test_city_pricing_low_venue_count_is_noindex(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({"venue_count": 0}) is True assert check({"venue_count": 1}) is True assert check({"venue_count": 2}) is True def test_city_pricing_sufficient_venues_is_indexable(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({"venue_count": 3}) is False assert check({"venue_count": 10}) is False def test_city_pricing_missing_venue_count_treated_as_zero(self): check = NOINDEX_THRESHOLDS["city-pricing"] assert check({}) is True assert check({"venue_count": None}) is True def test_city_cost_de_partial_data_is_noindex(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({"data_confidence": 0.0}) is True assert check({"data_confidence": 0.5}) is True assert check({"data_confidence": 0.99}) is True def test_city_cost_de_full_confidence_is_indexable(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({"data_confidence": 1.0}) is False def test_city_cost_de_missing_confidence_is_noindex(self): check = NOINDEX_THRESHOLDS["city-cost-de"] assert check({}) is True assert check({"data_confidence": None}) is True def test_country_overview_low_venues_is_noindex(self): check = NOINDEX_THRESHOLDS["country-overview"] assert check({"total_venues": 0}) is True assert check({"total_venues": 4}) is True def test_country_overview_sufficient_venues_is_indexable(self): check = NOINDEX_THRESHOLDS["country-overview"] assert check({"total_venues": 5}) is False assert check({"total_venues": 100}) is False def test_unknown_template_slug_has_no_threshold(self): assert "manual" not in NOINDEX_THRESHOLDS assert "unknown-template" not in NOINDEX_THRESHOLDS # ── Sitemap exclusion ──────────────────────────────────────────────────────── async def _insert_article( db, url_path: str = "/markets/de/berlin", title: str = "Test Article", language: str = "en", noindex: int = 0, ) -> int: """Insert a published article row and return its id.""" # Use a past published_at in SQLite-compatible format (space separator, no tz). # SQLite's datetime('now') returns "YYYY-MM-DD HH:MM:SS" with a space. # ISO format with T is lexicographically AFTER the space format for the # same instant, so current-time ISO strings fail the <= datetime('now') check. published_at = "2020-01-01 08:00:00" created_at = datetime.now(UTC).isoformat() async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, ?, '', 'published', ?, 'city-pricing', ?, ?, ?)""", (url_path, f"slug-{url_path.replace('/', '-')}", title, published_at, language, noindex, created_at), ) as cursor: article_id = cursor.lastrowid await db.commit() return article_id class TestSitemapNoindex: async def test_indexable_article_in_sitemap(self, client, db): """Article with noindex=0 should appear in sitemap.""" await _insert_article(db, url_path="/markets/en/berlin", noindex=0) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "/markets/en/berlin" in xml async def test_noindex_article_excluded_from_sitemap(self, client, db): """Article with noindex=1 must NOT appear in sitemap.""" await _insert_article(db, url_path="/markets/en/thin-city", noindex=1) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "/markets/en/thin-city" not in xml async def test_mixed_articles_only_indexable_in_sitemap(self, client, db): """Only indexable articles appear; noindex articles are silently dropped.""" await _insert_article(db, url_path="/markets/en/good-city", noindex=0) await _insert_article(db, url_path="/markets/en/bad-city", noindex=1) resp = await client.get("/sitemap.xml") xml = (await resp.data).decode() assert "good-city" in xml assert "bad-city" not in xml # ── Article detail robots meta tag ────────────────────────────────────────── class TestArticleDetailRobotsTag: """ Test that the article detail template emits (or omits) the robots meta tag. We test via the content blueprint's article route. Routes.py imports BUILD_DIR from content/__init__.py at module load time, so we must patch padelnomics.content.routes.BUILD_DIR (the local binding), not padelnomics.content.BUILD_DIR. """ async def test_noindex_article_has_robots_meta(self, client, db, tmp_path, monkeypatch): """Article with noindex=1 → in HTML.""" import padelnomics.content.routes as routes_mod build_dir = tmp_path / "en" build_dir.mkdir(parents=True) url_path = "/markets/noindex-test" slug = "city-pricing-en-noindex-test" (build_dir / f"{slug}.html").write_text("
Article body
") monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path) # Use past published_at in SQLite space-separator format async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, 'Noindex Test', '', 'published', '2020-01-01 08:00:00', 'city-pricing', 'en', 1, datetime('now'))""", (url_path, slug), ) as _: pass await db.commit() resp = await client.get(f"/en{url_path}") assert resp.status_code == 200 html = (await resp.data).decode() assert 'name="robots"' in html assert "noindex" in html async def test_indexable_article_has_no_robots_meta(self, client, db, tmp_path, monkeypatch): """Article with noindex=0 → no robots meta tag in HTML.""" import padelnomics.content.routes as routes_mod build_dir = tmp_path / "en" build_dir.mkdir(parents=True) url_path = "/markets/indexable-test" slug = "city-pricing-en-indexable-test" (build_dir / f"{slug}.html").write_text("Article body
") monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path) async with db.execute( """INSERT INTO articles (url_path, slug, title, meta_description, status, published_at, template_slug, language, noindex, created_at) VALUES (?, ?, 'Indexable Test', '', 'published', '2020-01-01 08:00:00', 'city-pricing', 'en', 0, datetime('now'))""", (url_path, slug), ) as _: pass await db.commit() resp = await client.get(f"/en{url_path}") assert resp.status_code == 200 html = (await resp.data).decode() assert 'content="noindex' not in html