Merge branch 'worktree-sitemap-improvement'

# Conflicts:
#	web/tests/conftest.py
This commit is contained in:
Deeman
2026-02-23 00:39:38 +01:00
4 changed files with 129 additions and 55 deletions

View File

@@ -251,63 +251,10 @@ def create_app() -> Quart:
)
return Response(body, content_type="text/plain")
# sitemap.xml must live at root
@app.route("/sitemap.xml")
async def sitemap():
from datetime import UTC, datetime
from .core import fetch_all
base = config.BASE_URL.rstrip("/")
today = datetime.now(UTC).strftime("%Y-%m-%d")
# Both language variants of all SEO pages
static_paths = [
"", # landing
"/features",
"/about",
"/terms",
"/privacy",
"/imprint",
"/suppliers",
"/markets",
]
entries: list[tuple[str, str]] = []
for path in static_paths:
for lang in ("en", "de"):
entries.append((f"{base}/{lang}{path}", today))
# Planner + directory lang variants, billing (no lang)
for lang in ("en", "de"):
entries.append((f"{base}/{lang}/planner/", today))
entries.append((f"{base}/{lang}/directory/", today))
entries.append((f"{base}/billing/pricing", today))
# Published articles — both lang variants
articles = await fetch_all(
"""SELECT url_path, COALESCE(updated_at, published_at) as lastmod
FROM articles
WHERE status = 'published' AND published_at <= datetime('now')
ORDER BY published_at DESC"""
)
for article in articles:
lastmod = article["lastmod"][:10] if article["lastmod"] else today
for lang in ("en", "de"):
entries.append((f"{base}/{lang}{article['url_path']}", lastmod))
# Supplier detail pages (English only — canonical)
suppliers = await fetch_all(
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
)
for supplier in suppliers:
lastmod = supplier["created_at"][:10] if supplier["created_at"] else today
entries.append((f"{base}/en/directory/{supplier['slug']}", lastmod))
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
for loc, lastmod in entries:
xml += f" <url><loc>{loc}</loc><lastmod>{lastmod}</lastmod></url>\n"
xml += "</urlset>"
return Response(xml, content_type="application/xml")
from .sitemap import sitemap_response
return await sitemap_response(config.BASE_URL)
# Health check
@app.route("/health")

View File

@@ -0,0 +1,117 @@
"""Sitemap generation with in-memory TTL cache and hreflang alternates."""
import time
from quart import Response
from .core import fetch_all
_cache_xml: str = ""
_cache_timestamp: float = 0.0
CACHE_TTL_SECONDS: int = 3600 # 1 hour
LANGS = ("en", "de")
DEFAULT_LANG = "en"
# Pages with lang prefix but no meaningful lastmod
STATIC_PATHS = [
"", # landing
"/features",
"/about",
"/terms",
"/privacy",
"/imprint",
"/suppliers",
"/markets",
"/planner/",
"/directory/",
]
def _url_entry(loc: str, alternates: list[tuple[str, str]], lastmod: str | None = None) -> str:
"""Build a single <url> entry with optional hreflang alternates and lastmod."""
parts = [f" <url>\n <loc>{loc}</loc>"]
for hreflang, href in alternates:
parts.append(
f' <xhtml:link rel="alternate" hreflang="{hreflang}" href="{href}"/>'
)
if lastmod:
parts.append(f" <lastmod>{lastmod}</lastmod>")
parts.append(" </url>")
return "\n".join(parts)
def _lang_alternates(base: str, path: str) -> list[tuple[str, str]]:
"""Build hreflang alternate list for a lang-prefixed path."""
alternates = []
for lang in LANGS:
alternates.append((lang, f"{base}/{lang}{path}"))
alternates.append(("x-default", f"{base}/{DEFAULT_LANG}{path}"))
return alternates
async def _generate_sitemap_xml(base_url: str) -> str:
"""Build sitemap XML from static paths + DB content."""
base = base_url.rstrip("/")
entries: list[str] = []
# Static pages — both lang variants, no lastmod (rarely changes)
for path in STATIC_PATHS:
alternates = _lang_alternates(base, path)
for lang in LANGS:
entries.append(_url_entry(f"{base}/{lang}{path}", alternates))
# Billing pricing — no lang prefix, no hreflang
entries.append(_url_entry(f"{base}/billing/pricing", []))
# Published articles — both lang variants with accurate lastmod
articles = await fetch_all(
"""SELECT url_path, COALESCE(updated_at, published_at) AS lastmod
FROM articles
WHERE status = 'published' AND published_at <= datetime('now')
ORDER BY published_at DESC
LIMIT 25000"""
)
for article in articles:
lastmod = article["lastmod"][:10] if article["lastmod"] else None
alternates = _lang_alternates(base, article["url_path"])
for lang in LANGS:
entries.append(
_url_entry(f"{base}/{lang}{article['url_path']}", alternates, lastmod)
)
# Supplier detail pages — both lang variants
suppliers = await fetch_all(
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
)
for supplier in suppliers:
lastmod = supplier["created_at"][:10] if supplier["created_at"] else None
path = f"/directory/{supplier['slug']}"
alternates = _lang_alternates(base, path)
for lang in LANGS:
entries.append(
_url_entry(f"{base}/{lang}{path}", alternates, lastmod)
)
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml += (
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
)
xml += "\n".join(entries)
xml += "\n</urlset>"
return xml
async def sitemap_response(base_url: str) -> Response:
"""Return cached sitemap XML, regenerating if stale (1-hour TTL)."""
global _cache_xml, _cache_timestamp # noqa: PLW0603
now = time.monotonic()
if not _cache_xml or (now - _cache_timestamp) > CACHE_TTL_SECONDS:
_cache_xml = await _generate_sitemap_xml(base_url)
_cache_timestamp = now
return Response(
_cache_xml,
content_type="application/xml",
headers={"Cache-Control": f"public, max-age={CACHE_TTL_SECONDS}"},
)