feat(outreach+pseo): follow-up scheduling, activity timeline, noindex articles (subtasks 1-9)

Feature A — Outreach follow-up scheduling + activity timeline:
- Migration 0025: follow_up_at column on suppliers
- POST /admin/outreach/<id>/follow-up route (HTMX date picker, updates row)
- get_follow_up_due_count() query + amber banner on /admin/outreach
- ?follow_up=due / ?follow_up=set filters in get_outreach_suppliers()
- Follow-up column in outreach_results.html + outreach_row.html date input
- Activity timeline on supplier_detail.html — merges email_log (sent outreach)
  and inbound_emails (received) by contact_email, sorted by date

Feature B — pSEO article noindex:
- Migration 0025: noindex column on articles (default 0)
- NOINDEX_THRESHOLDS dict in content/__init__.py (per-template thresholds)
- generate_articles() upsert now stores noindex = 1 for thin-data articles
- <meta name="robots" content="noindex, follow"> in article_detail.html (conditional)
- sitemap.py excludes noindex=1 articles from sitemap.xml
- pSEO dashboard noindex count card; article_row.html noindex badge
- 73 new tests (test_outreach.py + test_noindex.py), 1377 total, 0 failures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-25 16:12:21 +01:00
parent efaba2cb76
commit ea06dd0689
14 changed files with 584 additions and 14 deletions

196
web/tests/test_noindex.py Normal file
View File

@@ -0,0 +1,196 @@
"""
Tests for pSEO article noindex feature.
Covers:
- NOINDEX_THRESHOLDS: lambda functions evaluate correctly per template
- Sitemap excludes articles with noindex=1
- Article detail page emits <meta name="robots" content="noindex, follow"> for noindex articles
- Article detail page has no robots meta tag for indexable articles
"""
from datetime import UTC, datetime
from unittest.mock import AsyncMock, patch
import pytest
from padelnomics import core
from padelnomics.content import NOINDEX_THRESHOLDS
# ── Threshold unit tests ─────────────────────────────────────────────────────
class TestNoindexThresholds:
def test_city_pricing_low_venue_count_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({"venue_count": 0}) is True
assert check({"venue_count": 1}) is True
assert check({"venue_count": 2}) is True
def test_city_pricing_sufficient_venues_is_indexable(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({"venue_count": 3}) is False
assert check({"venue_count": 10}) is False
def test_city_pricing_missing_venue_count_treated_as_zero(self):
check = NOINDEX_THRESHOLDS["city-pricing"]
assert check({}) is True
assert check({"venue_count": None}) is True
def test_city_cost_de_partial_data_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({"data_confidence": 0.0}) is True
assert check({"data_confidence": 0.5}) is True
assert check({"data_confidence": 0.99}) is True
def test_city_cost_de_full_confidence_is_indexable(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({"data_confidence": 1.0}) is False
def test_city_cost_de_missing_confidence_is_noindex(self):
check = NOINDEX_THRESHOLDS["city-cost-de"]
assert check({}) is True
assert check({"data_confidence": None}) is True
def test_country_overview_low_venues_is_noindex(self):
check = NOINDEX_THRESHOLDS["country-overview"]
assert check({"total_venues": 0}) is True
assert check({"total_venues": 4}) is True
def test_country_overview_sufficient_venues_is_indexable(self):
check = NOINDEX_THRESHOLDS["country-overview"]
assert check({"total_venues": 5}) is False
assert check({"total_venues": 100}) is False
def test_unknown_template_slug_has_no_threshold(self):
assert "manual" not in NOINDEX_THRESHOLDS
assert "unknown-template" not in NOINDEX_THRESHOLDS
# ── Sitemap exclusion ────────────────────────────────────────────────────────
async def _insert_article(
db,
url_path: str = "/markets/de/berlin",
title: str = "Test Article",
language: str = "en",
noindex: int = 0,
) -> int:
"""Insert a published article row and return its id."""
# Use a past published_at in SQLite-compatible format (space separator, no tz).
# SQLite's datetime('now') returns "YYYY-MM-DD HH:MM:SS" with a space.
# ISO format with T is lexicographically AFTER the space format for the
# same instant, so current-time ISO strings fail the <= datetime('now') check.
published_at = "2020-01-01 08:00:00"
created_at = datetime.now(UTC).isoformat()
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, ?, '', 'published', ?, 'city-pricing', ?, ?, ?)""",
(url_path, f"slug-{url_path.replace('/', '-')}", title,
published_at, language, noindex, created_at),
) as cursor:
article_id = cursor.lastrowid
await db.commit()
return article_id
class TestSitemapNoindex:
async def test_indexable_article_in_sitemap(self, client, db):
"""Article with noindex=0 should appear in sitemap."""
await _insert_article(db, url_path="/markets/en/berlin", noindex=0)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "/markets/en/berlin" in xml
async def test_noindex_article_excluded_from_sitemap(self, client, db):
"""Article with noindex=1 must NOT appear in sitemap."""
await _insert_article(db, url_path="/markets/en/thin-city", noindex=1)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "/markets/en/thin-city" not in xml
async def test_mixed_articles_only_indexable_in_sitemap(self, client, db):
"""Only indexable articles appear; noindex articles are silently dropped."""
await _insert_article(db, url_path="/markets/en/good-city", noindex=0)
await _insert_article(db, url_path="/markets/en/bad-city", noindex=1)
resp = await client.get("/sitemap.xml")
xml = (await resp.data).decode()
assert "good-city" in xml
assert "bad-city" not in xml
# ── Article detail robots meta tag ──────────────────────────────────────────
class TestArticleDetailRobotsTag:
"""
Test that the article detail template emits (or omits) the robots meta tag.
We test via the content blueprint's article route.
Routes.py imports BUILD_DIR from content/__init__.py at module load time, so
we must patch padelnomics.content.routes.BUILD_DIR (the local binding), not
padelnomics.content.BUILD_DIR.
"""
async def test_noindex_article_has_robots_meta(self, client, db, tmp_path, monkeypatch):
"""Article with noindex=1 → <meta name="robots" content="noindex, follow"> in HTML."""
import padelnomics.content.routes as routes_mod
build_dir = tmp_path / "en"
build_dir.mkdir(parents=True)
url_path = "/markets/noindex-test"
slug = "city-pricing-en-noindex-test"
(build_dir / f"{slug}.html").write_text("<p>Article body</p>")
monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
# Use past published_at in SQLite space-separator format
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, 'Noindex Test', '', 'published', '2020-01-01 08:00:00',
'city-pricing', 'en', 1, datetime('now'))""",
(url_path, slug),
) as cursor:
pass
await db.commit()
resp = await client.get(f"/en{url_path}")
assert resp.status_code == 200
html = (await resp.data).decode()
assert 'name="robots"' in html
assert "noindex" in html
async def test_indexable_article_has_no_robots_meta(self, client, db, tmp_path, monkeypatch):
"""Article with noindex=0 → no robots meta tag in HTML."""
import padelnomics.content.routes as routes_mod
build_dir = tmp_path / "en"
build_dir.mkdir(parents=True)
url_path = "/markets/indexable-test"
slug = "city-pricing-en-indexable-test"
(build_dir / f"{slug}.html").write_text("<p>Article body</p>")
monkeypatch.setattr(routes_mod, "BUILD_DIR", tmp_path)
async with db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, status, published_at,
template_slug, language, noindex, created_at)
VALUES (?, ?, 'Indexable Test', '', 'published', '2020-01-01 08:00:00',
'city-pricing', 'en', 0, datetime('now'))""",
(url_path, slug),
) as cursor:
pass
await db.commit()
resp = await client.get(f"/en{url_path}")
assert resp.status_code == 200
html = (await resp.data).decode()
assert 'content="noindex' not in html