Files
padelnomics/web/tests/test_noindex.py
Deeman c269caf048 fix(lint): resolve all ruff E402/F401/F841/I001 errors
- Move logger= after imports in planner/routes.py and setup_paddle.py
- Add # noqa: E402 to intentional post-setup imports (app.py, core.py,
  migrate.py, test_supervisor.py)
- Fix unused cursor variables (test_noindex.py) → _
- Move stray csv import to top of test_outreach.py
- Auto-sort import blocks (test_email_templates, test_noindex, test_outreach)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 11:52:02 +01:00

192 lines
7.9 KiB
Python

"""
Tests for pSEO article noindex feature.
Covers:
- NOINDEX_THRESHOLDS: lambda functions evaluate correctly per template
- Sitemap excludes articles with noindex=1
- Article detail page emits <meta name="robots" content="noindex, follow"> for noindex articles
- Article detail page has no robots meta tag for indexable articles
"""
from datetime import UTC, datetime
from padelnomics.content import NOINDEX_THRESHOLDS
# ── Threshold unit tests ─────────────────────────────────────────────────────
class TestNoindexThresholds:
def test_city_pricing_low_venue_count_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({"venue_count": 0}) is True
assert check({"venue_count": 1}) is True
assert check({"venue_count": 2}) is True
def test_city_pricing_sufficient_venues_is_indexable(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({"venue_count": 3}) is False
assert check({"venue_count": 10}) is False
def test_city_pricing_missing_venue_count_treated_as_zero(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({}) is True
assert check({"venue_count": None}) is True
def test_city_cost_de_partial_data_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({"data_confidence": 0.0}) is True
assert check({"data_confidence": 0.5}) is True
assert check({"data_confidence": 0.99}) is True
def test_city_cost_de_full_confidence_is_indexable(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({"data_confidence": 1.0}) is False
def test_city_cost_de_missing_confidence_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({}) is True
assert check({"data_confidence": None}) is True
def test_country_overview_low_venues_is_noindex(self):
check = NOINDEX_THRESHOLDS["country-overview"]
assert check({"total_venues": 0}) is True
assert check({"total_venues": 4}) is True
def test_country_overview_sufficient_venues_is_indexable(self):
check = NOINDEX_THRESHOLDS["country-overview"]
assert check({"total_venues": 5}) is False
assert check({"total_venues": 100}) is False
def test_unknown_template_slug_has_no_threshold(self):
assert "manual" not in NOINDEX_THRESHOLDS
assert "unknown-template" not in NOINDEX_THRESHOLDS
# ── Sitemap exclusion ────────────────────────────────────────────────────────
async def _insert_article(
db,
url_path: str = "/markets/de/berlin",
title: str = "Test Article",
language: str = "en",
noindex: int = 0,
) -> int:
"""Insert a published article row and return its id."""
# Use a past published_at in SQLite-compatible format (space separator, no tz).
# SQLite's datetime('now') returns "YYYY-MM-DD HH:MM:SS" with a space.
# ISO format with T is lexicographically AFTER the space format for the
# same instant, so current-time ISO strings fail the <= datetime('now') check.
published_at = "2020-01-01 08:00:00"
created_at = datetime.now(UTC).isoformat()
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, ?, '', 'published', ?, 'city-pricing', ?, ?, ?)""",
(url_path, f"slug-{url_path.replace('/', '-')}", title,
published_at, language, noindex, created_at),
) as cursor:
article_id = cursor.lastrowid
await db.commit()
return article_id
class TestSitemapNoindex:
async def test_indexable_article_in_sitemap(self, client, db):
"""Article with noindex=0 should appear in sitemap."""
await _insert_article(db, url_path="/markets/en/berlin", noindex=0)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "/markets/en/berlin" in xml
async def test_noindex_article_excluded_from_sitemap(self, client, db):
"""Article with noindex=1 must NOT appear in sitemap."""
await _insert_article(db, url_path="/markets/en/thin-city", noindex=1)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "/markets/en/thin-city" not in xml
async def test_mixed_articles_only_indexable_in_sitemap(self, client, db):
"""Only indexable articles appear; noindex articles are silently dropped."""
await _insert_article(db, url_path="/markets/en/good-city", noindex=0)
await _insert_article(db, url_path="/markets/en/bad-city", noindex=1)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "good-city" in xml
assert "bad-city" not in xml
# ── Article detail robots meta tag ──────────────────────────────────────────
class TestArticleDetailRobotsTag:
"""
Test that the article detail template emits (or omits) the robots meta tag.
We test via the content blueprint's article route.
Routes.py imports BUILD_DIR from content/__init__.py at module load time, so
we must patch padelnomics.content.routes.BUILD_DIR (the local binding), not
padelnomics.content.BUILD_DIR.
"""
async def test_noindex_article_has_robots_meta(self, client, db, tmp_path, monkeypatch):
"""Article with noindex=1 → <meta name="robots" content="noindex, follow"> in HTML."""
import padelnomics.content.routes as routes_mod
build_dir = tmp_path / "en"
build_dir.mkdir(parents=True)
url_path = "/markets/noindex-test"
slug = "city-pricing-en-noindex-test"
(build_dir / f"{slug}.html").write_text("<p>Article body</p>")
monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
# Use past published_at in SQLite space-separator format
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, 'Noindex Test', '', 'published', '2020-01-01 08:00:00',
'city-pricing', 'en', 1, datetime('now'))""",
(url_path, slug),
) as _:
pass
await db.commit()
resp = await client.get(f"/en{url_path}")
assert resp.status_code == 200
html = (await resp.data).decode()
assert 'name="robots"' in html
assert "noindex" in html
async def test_indexable_article_has_no_robots_meta(self, client, db, tmp_path, monkeypatch):
"""Article with noindex=0 → no robots meta tag in HTML."""
import padelnomics.content.routes as routes_mod
build_dir = tmp_path / "en"
build_dir.mkdir(parents=True)
url_path = "/markets/indexable-test"
slug = "city-pricing-en-indexable-test"
(build_dir / f"{slug}.html").write_text("<p>Article body</p>")
monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, 'Indexable Test', '', 'published', '2020-01-01 08:00:00',
'city-pricing', 'en', 0, datetime('now'))""",
(url_path, slug),
) as _:
pass
await db.commit()
resp = await client.get(f"/en{url_path}")
assert resp.status_code == 200
html = (await resp.data).decode()
assert 'content="noindex' not in html