feat(outreach+pseo): follow-up scheduling, activity timeline, noindex articles (subtasks 1-9)

Feature A — Outreach follow-up scheduling + activity timeline: - Migration 0025: follow_up_at column on suppliers - POST /admin/outreach/<id>/follow-up route (HTMX date picker, updates row) - get_follow_up_due_count() query + amber banner on /admin/outreach - ?follow_up=due / ?follow_up=set filters in get_outreach_suppliers() - Follow-up column in outreach_results.html + outreach_row.html date input - Activity timeline on supplier_detail.html — merges email_log (sent outreach) and inbound_emails (received) by contact_email, sorted by date Feature B — pSEO article noindex: - Migration 0025: noindex column on articles (default 0) - NOINDEX_THRESHOLDS dict in content/__init__.py (per-template thresholds) - generate_articles() upsert now stores noindex = 1 for thin-data articles - <meta name="robots" content="noindex, follow"> in article_detail.html (conditional) - sitemap.py excludes noindex=1 articles from sitemap.xml - pSEO dashboard noindex count card; article_row.html noindex badge - 73 new tests (test_outreach.py + test_noindex.py), 1377 total, 0 failures Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 16:12:21 +01:00
parent efaba2cb76
commit ea06dd0689
14 changed files with 584 additions and 14 deletions
--- a/web/tests/test_noindex.py
+++ b/web/tests/test_noindex.py
@@ -0,0 +1,196 @@
+"""
+Tests for pSEO article noindex feature.
+
+Covers:
+- NOINDEX_THRESHOLDS: lambda functions evaluate correctly per template
+- Sitemap excludes articles with noindex=1
+- Article detail page emits <meta name="robots" content="noindex, follow"> for noindex articles
+- Article detail page has no robots meta tag for indexable articles
+"""
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from padelnomics import core
+from padelnomics.content import NOINDEX_THRESHOLDS
+
+
+# ── Threshold unit tests ─────────────────────────────────────────────────────
+
+
+class TestNoindexThresholds:
+    def test_city_pricing_low_venue_count_is_noindex(self):
+        check = NOINDEX_THRESHOLDS["city-pricing"]
+        assert check({"venue_count": 0}) is True
+        assert check({"venue_count": 1}) is True
+        assert check({"venue_count": 2}) is True
+
+    def test_city_pricing_sufficient_venues_is_indexable(self):
+        check = NOINDEX_THRESHOLDS["city-pricing"]
+        assert check({"venue_count": 3}) is False
+        assert check({"venue_count": 10}) is False
+
+    def test_city_pricing_missing_venue_count_treated_as_zero(self):
+        check = NOINDEX_THRESHOLDS["city-pricing"]
+        assert check({}) is True
+        assert check({"venue_count": None}) is True
+
+    def test_city_cost_de_partial_data_is_noindex(self):
+        check = NOINDEX_THRESHOLDS["city-cost-de"]
+        assert check({"data_confidence": 0.0}) is True
+        assert check({"data_confidence": 0.5}) is True
+        assert check({"data_confidence": 0.99}) is True
+
+    def test_city_cost_de_full_confidence_is_indexable(self):
+        check = NOINDEX_THRESHOLDS["city-cost-de"]
+        assert check({"data_confidence": 1.0}) is False
+
+    def test_city_cost_de_missing_confidence_is_noindex(self):
+        check = NOINDEX_THRESHOLDS["city-cost-de"]
+        assert check({}) is True
+        assert check({"data_confidence": None}) is True
+
+    def test_country_overview_low_venues_is_noindex(self):
+        check = NOINDEX_THRESHOLDS["country-overview"]
+        assert check({"total_venues": 0}) is True
+        assert check({"total_venues": 4}) is True
+
+    def test_country_overview_sufficient_venues_is_indexable(self):
+        check = NOINDEX_THRESHOLDS["country-overview"]
+        assert check({"total_venues": 5}) is False
+        assert check({"total_venues": 100}) is False
+
+    def test_unknown_template_slug_has_no_threshold(self):
+        assert "manual" not in NOINDEX_THRESHOLDS
+        assert "unknown-template" not in NOINDEX_THRESHOLDS
+
+
+# ── Sitemap exclusion ────────────────────────────────────────────────────────
+
+
+async def _insert_article(
+    db,
+    url_path: str = "/markets/de/berlin",
+    title: str = "Test Article",
+    language: str = "en",
+    noindex: int = 0,
+) -> int:
+    """Insert a published article row and return its id."""
+    # Use a past published_at in SQLite-compatible format (space separator, no tz).
+    # SQLite's datetime('now') returns "YYYY-MM-DD HH:MM:SS" with a space.
+    # ISO format with T is lexicographically AFTER the space format for the
+    # same instant, so current-time ISO strings fail the <= datetime('now') check.
+    published_at = "2020-01-01 08:00:00"
+    created_at = datetime.now(UTC).isoformat()
+    async with db.execute(
+        """INSERT INTO articles
+           (url_path, slug, title, meta_description, status, published_at,
+            template_slug, language, noindex, created_at)
+           VALUES (?, ?, ?, '', 'published', ?, 'city-pricing', ?, ?, ?)""",
+        (url_path, f"slug-{url_path.replace('/', '-')}", title,
+         published_at, language, noindex, created_at),
+    ) as cursor:
+        article_id = cursor.lastrowid
+    await db.commit()
+    return article_id
+
+
+class TestSitemapNoindex:
+    async def test_indexable_article_in_sitemap(self, client, db):
+        """Article with noindex=0 should appear in sitemap."""
+        await _insert_article(db, url_path="/markets/en/berlin", noindex=0)
+        resp = await client.get("/sitemap.xml")
+        xml = (await resp.data).decode()
+        assert "/markets/en/berlin" in xml
+
+    async def test_noindex_article_excluded_from_sitemap(self, client, db):
+        """Article with noindex=1 must NOT appear in sitemap."""
+        await _insert_article(db, url_path="/markets/en/thin-city", noindex=1)
+        resp = await client.get("/sitemap.xml")
+        xml = (await resp.data).decode()
+        assert "/markets/en/thin-city" not in xml
+
+    async def test_mixed_articles_only_indexable_in_sitemap(self, client, db):
+        """Only indexable articles appear; noindex articles are silently dropped."""
+        await _insert_article(db, url_path="/markets/en/good-city", noindex=0)
+        await _insert_article(db, url_path="/markets/en/bad-city", noindex=1)
+
+        resp = await client.get("/sitemap.xml")
+        xml = (await resp.data).decode()
+        assert "good-city" in xml
+        assert "bad-city" not in xml
+
+
+# ── Article detail robots meta tag ──────────────────────────────────────────
+
+
+class TestArticleDetailRobotsTag:
+    """
+    Test that the article detail template emits (or omits) the robots meta tag.
+    We test via the content blueprint's article route.
+
+    Routes.py imports BUILD_DIR from content/__init__.py at module load time, so
+    we must patch padelnomics.content.routes.BUILD_DIR (the local binding), not
+    padelnomics.content.BUILD_DIR.
+    """
+
+    async def test_noindex_article_has_robots_meta(self, client, db, tmp_path, monkeypatch):
+        """Article with noindex=1 → <meta name="robots" content="noindex, follow"> in HTML."""
+        import padelnomics.content.routes as routes_mod
+
+        build_dir = tmp_path / "en"
+        build_dir.mkdir(parents=True)
+
+        url_path = "/markets/noindex-test"
+        slug = "city-pricing-en-noindex-test"
+        (build_dir / f"{slug}.html").write_text("<p>Article body</p>")
+
+        monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
+
+        # Use past published_at in SQLite space-separator format
+        async with db.execute(
+            """INSERT INTO articles
+               (url_path, slug, title, meta_description, status, published_at,
+                template_slug, language, noindex, created_at)
+               VALUES (?, ?, 'Noindex Test', '', 'published', '2020-01-01 08:00:00',
+                       'city-pricing', 'en', 1, datetime('now'))""",
+            (url_path, slug),
+        ) as cursor:
+            pass
+        await db.commit()
+
+        resp = await client.get(f"/en{url_path}")
+        assert resp.status_code == 200
+        html = (await resp.data).decode()
+        assert 'name="robots"' in html
+        assert "noindex" in html
+
+    async def test_indexable_article_has_no_robots_meta(self, client, db, tmp_path, monkeypatch):
+        """Article with noindex=0 → no robots meta tag in HTML."""
+        import padelnomics.content.routes as routes_mod
+
+        build_dir = tmp_path / "en"
+        build_dir.mkdir(parents=True)
+
+        url_path = "/markets/indexable-test"
+        slug = "city-pricing-en-indexable-test"
+        (build_dir / f"{slug}.html").write_text("<p>Article body</p>")
+
+        monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
+
+        async with db.execute(
+            """INSERT INTO articles
+               (url_path, slug, title, meta_description, status, published_at,
+                template_slug, language, noindex, created_at)
+               VALUES (?, ?, 'Indexable Test', '', 'published', '2020-01-01 08:00:00',
+                       'city-pricing', 'en', 0, datetime('now'))""",
+            (url_path, slug),
+        ) as cursor:
+            pass
+        await db.commit()
+
+        resp = await client.get(f"/en{url_path}")
+        assert resp.status_code == 200
+        html = (await resp.data).decode()
+        assert 'content="noindex' not in html