feat: outreach follow-up scheduling, activity timeline, and pSEO noindex (migration 0025)

Feature A — Outreach follow-up + activity timeline:
- follow_up_at column on suppliers (migration 0025)
- HTMX date picker on outreach rows, POST /admin/outreach/<id>/follow-up
- Amber due-today banner on /admin/outreach with ?follow_up=due filter
- get_follow_up_due_count() for dashboard widget
- Activity timeline on /admin/suppliers/<id>: merges sent + received emails by contact_email

Feature B — pSEO article noindex:
- noindex column on articles (migration 0025)
- NOINDEX_THRESHOLDS per-template lambdas in content/__init__.py
- generate_articles() evaluates threshold and stores noindex=1 for thin-data articles
- <meta name="robots" content="noindex, follow"> in article_detail.html
- Sitemap excludes noindex articles (AND noindex = 0)
- pSEO dashboard noindex count card + article row badge

Tests: 49 new tests (29 outreach, 20 noindex), 1377 total, 0 failures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-25 17:51:38 +01:00
16 changed files with 605 additions and 14 deletions

View File

@@ -80,6 +80,9 @@ async def pseo_dashboard():
total_published = sum(r["stats"]["published"] for r in template_rows)
stale_count = sum(1 for f in freshness if f["status"] == "stale")
noindex_row = await fetch_one("SELECT COUNT(*) as cnt FROM articles WHERE noindex = 1")
noindex_count = noindex_row["cnt"] if noindex_row else 0
# Recent generation jobs — enough for the dashboard summary.
jobs = await fetch_all(
"SELECT id, task_name, status, progress_current, progress_total,"
@@ -95,6 +98,7 @@ async def pseo_dashboard():
total_published=total_published,
total_templates=len(templates),
stale_count=stale_count,
noindex_count=noindex_count,
jobs=jobs,
admin_page="pseo",
)

View File

@@ -1021,6 +1021,30 @@ async def supplier_detail(supplier_id: int):
)
enquiry_count = enquiry_row["cnt"] if enquiry_row else 0
# Email activity timeline — correlate by contact_email (no FK)
timeline = []
contact_email = supplier["contact_email"] if supplier else None
if contact_email:
sent = await fetch_all(
"""SELECT created_at, subject, 'sent' AS direction
FROM email_log
WHERE to_addr = ? AND email_type = 'outreach'
ORDER BY created_at DESC LIMIT 50""",
(contact_email,),
)
received = await fetch_all(
"""SELECT received_at AS created_at, subject, 'received' AS direction
FROM inbound_emails
WHERE from_addr = ?
ORDER BY received_at DESC LIMIT 50""",
(contact_email,),
)
timeline = sorted(
list(sent) + list(received),
key=lambda x: x["created_at"] or "",
reverse=True,
)[:50]
return await render_template(
"admin/supplier_detail.html",
supplier=supplier,
@@ -1030,6 +1054,7 @@ async def supplier_detail(supplier_id: int):
boosts=boosts,
forwards=forwards,
enquiry_count=enquiry_count,
timeline=timeline,
)
@@ -2637,6 +2662,15 @@ _CSV_OPTIONAL = {"country_code", "category", "website"}
_CSV_IMPORT_LIMIT = 500 # guard against huge uploads
async def get_follow_up_due_count() -> int:
"""Count pipeline suppliers with follow_up_at <= today."""
row = await fetch_one(
"""SELECT COUNT(*) as cnt FROM suppliers
WHERE outreach_status IS NOT NULL AND follow_up_at <= date('now')"""
)
return row["cnt"] if row else 0
async def get_outreach_pipeline() -> dict:
"""Count suppliers per outreach status for the pipeline summary cards."""
rows = await fetch_all(
@@ -2656,6 +2690,7 @@ async def get_outreach_suppliers(
status: str = None,
country: str = None,
search: str = None,
follow_up: str = None,
page: int = 1,
per_page: int = 50,
) -> list[dict]:
@@ -2672,6 +2707,10 @@ async def get_outreach_suppliers(
if search:
wheres.append("(name LIKE ? OR contact_email LIKE ?)")
params.extend([f"%{search}%", f"%{search}%"])
if follow_up == "due":
wheres.append("follow_up_at <= date('now')")
elif follow_up == "set":
wheres.append("follow_up_at IS NOT NULL")
where = " AND ".join(wheres)
offset = (page - 1) * per_page
@@ -2680,7 +2719,7 @@ async def get_outreach_suppliers(
return await fetch_all(
f"""SELECT id, name, country_code, category, contact_email,
outreach_status, outreach_notes, last_contacted_at,
outreach_sequence_step
outreach_sequence_step, follow_up_at
FROM suppliers
WHERE {where}
ORDER BY
@@ -2704,12 +2743,14 @@ async def outreach():
status = request.args.get("status", "")
country = request.args.get("country", "")
search = request.args.get("search", "").strip()
follow_up = request.args.get("follow_up", "")
page = max(1, int(request.args.get("page", "1") or "1"))
pipeline = await get_outreach_pipeline()
follow_up_due = await get_follow_up_due_count()
supplier_list = await get_outreach_suppliers(
status=status or None, country=country or None,
search=search or None, page=page,
search=search or None, follow_up=follow_up or None, page=page,
)
countries = await fetch_all(
"""SELECT DISTINCT country_code FROM suppliers
@@ -2720,12 +2761,14 @@ async def outreach():
return await render_template(
"admin/outreach.html",
pipeline=pipeline,
follow_up_due=follow_up_due,
suppliers=supplier_list,
statuses=OUTREACH_STATUSES,
countries=[c["country_code"] for c in countries],
current_status=status,
current_country=country,
current_search=search,
current_follow_up=follow_up,
page=page,
)
@@ -2737,11 +2780,12 @@ async def outreach_results():
status = request.args.get("status", "")
country = request.args.get("country", "")
search = request.args.get("search", "").strip()
follow_up = request.args.get("follow_up", "")
page = max(1, int(request.args.get("page", "1") or "1"))
supplier_list = await get_outreach_suppliers(
status=status or None, country=country or None,
search=search or None, page=page,
search=search or None, follow_up=follow_up or None, page=page,
)
return await render_template(
"admin/partials/outreach_results.html", suppliers=supplier_list,
@@ -2797,6 +2841,33 @@ async def outreach_note(supplier_id: int):
return note[:80] + ("" if len(note) > 80 else "") if note else ""
@bp.route("/outreach/<int:supplier_id>/follow-up", methods=["POST"])
@role_required("admin")
@csrf_protect
async def outreach_follow_up(supplier_id: int):
"""HTMX: set or clear the follow_up_at date for a supplier, return the updated row."""
supplier = await fetch_one(
"SELECT * FROM suppliers WHERE id = ? AND outreach_status IS NOT NULL",
(supplier_id,),
)
if not supplier:
return Response("Not found", status=404)
form = await request.form
follow_up_at_raw = form.get("follow_up_at", "").strip()
# Accept YYYY-MM-DD or empty (to clear)
follow_up_at = follow_up_at_raw if follow_up_at_raw else None
await execute(
"UPDATE suppliers SET follow_up_at = ? WHERE id = ?",
(follow_up_at, supplier_id),
)
updated = await fetch_one("SELECT * FROM suppliers WHERE id = ?", (supplier_id,))
return await render_template("admin/partials/outreach_row.html", s=updated)
@bp.route("/outreach/add-prospects", methods=["POST"])
@role_required("admin")
@csrf_protect

View File

@@ -37,6 +37,17 @@
{% endfor %}
</div>
<!-- Follow-up banner -->
{% if follow_up_due > 0 %}
<div style="background:#FEF3C7;border:1px solid #F59E0B;border-radius:6px;padding:0.75rem 1rem;margin-bottom:1rem;display:flex;align-items:center;justify-content:space-between">
<span style="color:#92400E;font-size:0.875rem">
&#x23F0; <strong>{{ follow_up_due }}</strong> follow-up{{ 's' if follow_up_due != 1 else '' }} due today
</span>
<a href="{{ url_for('admin.outreach') }}?follow_up=due"
class="btn-outline btn-sm" style="font-size:0.75rem;padding:3px 10px">Show them</a>
</div>
{% endif %}
<!-- Filters -->
<div class="card mb-4" style="padding:1rem 1.25rem;">
<form class="flex flex-wrap gap-3 items-end"
@@ -45,6 +56,7 @@
hx-trigger="change, input delay:300ms"
hx-indicator="#outreach-loading">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
{% if current_follow_up %}<input type="hidden" name="follow_up" value="{{ current_follow_up }}">{% endif %}
<div>
<label class="text-xs font-semibold text-slate block mb-1">Status</label>

View File

@@ -9,6 +9,7 @@
{% else %}
<span class="badge">Draft</span>
{% endif %}
{% if a.noindex %}<span class="badge" style="background:#FEF3C7;color:#92400E;font-size:0.6rem">noindex</span>{% endif %}
</td>
<td class="mono">{{ a.published_at[:10] if a.published_at else '-' }}</td>
<td>{{ a.language | upper if a.language else '-' }}</td>

View File

@@ -8,6 +8,7 @@
<th>Status</th>
<th>Step</th>
<th>Last Contact</th>
<th>Follow-up</th>
<th>Notes</th>
<th>Actions</th>
</tr>

View File

@@ -35,6 +35,20 @@
{% endif %}
</td>
<td>
{# Follow-up date picker — submits on change, row swaps via HTMX #}
<form hx-post="{{ url_for('admin.outreach_follow_up', supplier_id=s.id) }}"
hx-target="#outreach-row-{{ s.id }}"
hx-swap="outerHTML"
class="m-0">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<input type="date" name="follow_up_at"
value="{{ s.follow_up_at or '' }}"
class="form-input"
style="font-size:0.75rem;padding:2px 6px"
onchange="this.form.requestSubmit()">
</form>
</td>
<td style="max-width:160px">
{# Inline note edit #}
<form hx-post="{{ url_for('admin.outreach_note', supplier_id=s.id) }}"

View File

@@ -50,9 +50,9 @@
<p class="text-xs text-slate mt-1">data newer than articles</p>
</div>
<div class="card text-center">
<p class="card-header">Health Checks</p>
<p class="text-3xl font-bold text-navy"></p>
<p class="text-xs text-slate mt-1">see Health section below</p>
<p class="card-header">Noindex</p>
<p class="text-3xl font-bold {% if noindex_count > 0 %}text-amber-600{% else %}text-navy{% endif %}">{{ noindex_count }}</p>
<p class="text-xs text-slate mt-1">thin-data articles</p>
</div>
</div>

View File

@@ -143,6 +143,34 @@
</div>
{% endif %}
<!-- Email activity timeline -->
{% if supplier.contact_email %}
<div class="card mb-4" style="padding:1.5rem">
<h2 class="text-lg mb-3">Activity Timeline</h2>
{% if timeline %}
<div style="display:flex;flex-direction:column;gap:0.5rem">
{% for entry in timeline %}
<div style="display:flex;gap:0.75rem;align-items:baseline;font-size:0.8125rem">
<span style="flex-shrink:0;width:1.5rem;text-align:center;color:{% if entry.direction == 'sent' %}#2563EB{% else %}#059669{% endif %}">
{% if entry.direction == 'sent' %}&#x2190;{% else %}&#x2192;{% endif %}
</span>
<span class="mono text-xs text-slate" style="flex-shrink:0;width:6.5rem">
{{ entry.created_at[:10] if entry.created_at else '—' }}
</span>
<span style="overflow:hidden;text-overflow:ellipsis;white-space:nowrap;flex:1"
title="{{ entry.subject or '' }}">
{{ (entry.subject or '(no subject)')[:80] }}
</span>
<span class="text-xs text-slate" style="flex-shrink:0">{{ entry.direction }}</span>
</div>
{% endfor %}
</div>
{% else %}
<p class="text-sm text-slate">No email history yet.</p>
{% endif %}
</div>
{% endif %}
<!-- Active boosts -->
<div class="card" style="padding:1.5rem">
<h2 class="text-lg mb-3">Active Boosts</h2>

View File

@@ -26,6 +26,14 @@ logger = logging.getLogger(__name__)
TEMPLATES_DIR = Path(__file__).parent / "templates"
BUILD_DIR = Path("data/content/_build")
# Threshold functions per template slug.
# Return True → article should be noindex (insufficient data for quality content).
NOINDEX_THRESHOLDS: dict = {
"city-pricing": lambda row: (row.get("venue_count") or 0) < 3,
"city-cost-de": lambda row: (row.get("data_confidence") or 0) < 1.0,
"country-overview": lambda row: (row.get("total_venues") or 0) < 5,
}
_REQUIRED_FRONTMATTER = {
"name", "slug", "content_type", "data_table",
"natural_key", "languages", "url_pattern", "title_pattern",
@@ -499,25 +507,31 @@ async def generate_articles(
md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md)
# Evaluate noindex threshold for this template + data row.
_threshold = NOINDEX_THRESHOLDS.get(slug)
should_noindex = 1 if _threshold and _threshold(row) else 0
# Upsert article — keyed by (url_path, language).
# Single statement: no SELECT round-trip, no per-row commit.
await db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, country, region,
status, published_at, template_slug, language, date_modified,
seo_head, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
seo_head, noindex, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(url_path, language) DO UPDATE SET
title = excluded.title,
meta_description = excluded.meta_description,
template_slug = excluded.template_slug,
date_modified = excluded.date_modified,
seo_head = excluded.seo_head,
noindex = excluded.noindex,
updated_at = excluded.date_modified""",
(
url_path, article_slug, title, meta_desc,
row.get("country", ""), row.get("region", ""),
publish_dt, slug, lang, now_iso, seo_head, now_iso,
publish_dt, slug, lang, now_iso, seo_head,
should_noindex, now_iso,
),
)

View File

@@ -3,6 +3,7 @@
{% block title %}{{ article.title }} - {{ config.APP_NAME }}{% endblock %}
{% block head %}
{% if article.noindex %}<meta name="robots" content="noindex, follow">{% endif %}
<meta name="description" content="{{ article.meta_description or '' }}">
{% if article.og_image_url %}
<meta property="og:image" content="{{ article.og_image_url }}">

View File

@@ -0,0 +1,15 @@
"""Migration 0025: Add follow_up_at to suppliers + noindex to articles.
follow_up_at: ISO date string (YYYY-MM-DD) for scheduled follow-up reminders.
NULL = no follow-up scheduled.
noindex: 1 = search engines should not index this article (thin/insufficient data).
0 = indexable (default). Set at generation time by NOINDEX_THRESHOLDS.
"""
def up(conn) -> None:
# Scheduled follow-up date for outreach pipeline suppliers.
conn.execute("ALTER TABLE suppliers ADD COLUMN follow_up_at TEXT DEFAULT NULL")
# Prevent indexing of articles with insufficient data.
conn.execute("ALTER TABLE articles ADD COLUMN noindex INTEGER NOT NULL DEFAULT 0")

View File

@@ -68,11 +68,12 @@ async def _generate_sitemap_xml(base_url: str) -> str:
# Billing pricing — no lang prefix, no hreflang
entries.append(_url_entry(f"{base}/billing/pricing", []))
# Published articles — both lang variants with accurate lastmod
# Published articles — both lang variants with accurate lastmod.
# Exclude noindex articles (thin data) to keep sitemap signal-dense.
articles = await fetch_all(
"""SELECT url_path, COALESCE(updated_at, published_at) AS lastmod
FROM articles
WHERE status = 'published' AND published_at <= datetime('now')
WHERE status = 'published' AND noindex = 0 AND published_at <= datetime('now')
ORDER BY published_at DESC
LIMIT 25000"""
)