Merge branch 'worktree-sitemap-improvement'
# Conflicts: # web/tests/conftest.py
This commit is contained in:
@@ -251,63 +251,10 @@ def create_app() -> Quart:
|
||||
)
|
||||
return Response(body, content_type="text/plain")
|
||||
|
||||
# sitemap.xml must live at root
|
||||
@app.route("/sitemap.xml")
|
||||
async def sitemap():
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from .core import fetch_all
|
||||
base = config.BASE_URL.rstrip("/")
|
||||
today = datetime.now(UTC).strftime("%Y-%m-%d")
|
||||
|
||||
# Both language variants of all SEO pages
|
||||
static_paths = [
|
||||
"", # landing
|
||||
"/features",
|
||||
"/about",
|
||||
"/terms",
|
||||
"/privacy",
|
||||
"/imprint",
|
||||
"/suppliers",
|
||||
"/markets",
|
||||
]
|
||||
entries: list[tuple[str, str]] = []
|
||||
for path in static_paths:
|
||||
for lang in ("en", "de"):
|
||||
entries.append((f"{base}/{lang}{path}", today))
|
||||
|
||||
# Planner + directory lang variants, billing (no lang)
|
||||
for lang in ("en", "de"):
|
||||
entries.append((f"{base}/{lang}/planner/", today))
|
||||
entries.append((f"{base}/{lang}/directory/", today))
|
||||
entries.append((f"{base}/billing/pricing", today))
|
||||
|
||||
# Published articles — both lang variants
|
||||
articles = await fetch_all(
|
||||
"""SELECT url_path, COALESCE(updated_at, published_at) as lastmod
|
||||
FROM articles
|
||||
WHERE status = 'published' AND published_at <= datetime('now')
|
||||
ORDER BY published_at DESC"""
|
||||
)
|
||||
for article in articles:
|
||||
lastmod = article["lastmod"][:10] if article["lastmod"] else today
|
||||
for lang in ("en", "de"):
|
||||
entries.append((f"{base}/{lang}{article['url_path']}", lastmod))
|
||||
|
||||
# Supplier detail pages (English only — canonical)
|
||||
suppliers = await fetch_all(
|
||||
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
|
||||
)
|
||||
for supplier in suppliers:
|
||||
lastmod = supplier["created_at"][:10] if supplier["created_at"] else today
|
||||
entries.append((f"{base}/en/directory/{supplier['slug']}", lastmod))
|
||||
|
||||
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
|
||||
for loc, lastmod in entries:
|
||||
xml += f" <url><loc>{loc}</loc><lastmod>{lastmod}</lastmod></url>\n"
|
||||
xml += "</urlset>"
|
||||
return Response(xml, content_type="application/xml")
|
||||
from .sitemap import sitemap_response
|
||||
return await sitemap_response(config.BASE_URL)
|
||||
|
||||
# Health check
|
||||
@app.route("/health")
|
||||
|
||||
117
web/src/padelnomics/sitemap.py
Normal file
117
web/src/padelnomics/sitemap.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""Sitemap generation with in-memory TTL cache and hreflang alternates."""
|
||||
|
||||
import time
|
||||
|
||||
from quart import Response
|
||||
|
||||
from .core import fetch_all
|
||||
|
||||
_cache_xml: str = ""
|
||||
_cache_timestamp: float = 0.0
|
||||
CACHE_TTL_SECONDS: int = 3600 # 1 hour
|
||||
|
||||
LANGS = ("en", "de")
|
||||
DEFAULT_LANG = "en"
|
||||
|
||||
# Pages with lang prefix but no meaningful lastmod
|
||||
STATIC_PATHS = [
|
||||
"", # landing
|
||||
"/features",
|
||||
"/about",
|
||||
"/terms",
|
||||
"/privacy",
|
||||
"/imprint",
|
||||
"/suppliers",
|
||||
"/markets",
|
||||
"/planner/",
|
||||
"/directory/",
|
||||
]
|
||||
|
||||
|
||||
def _url_entry(loc: str, alternates: list[tuple[str, str]], lastmod: str | None = None) -> str:
|
||||
"""Build a single <url> entry with optional hreflang alternates and lastmod."""
|
||||
parts = [f" <url>\n <loc>{loc}</loc>"]
|
||||
for hreflang, href in alternates:
|
||||
parts.append(
|
||||
f' <xhtml:link rel="alternate" hreflang="{hreflang}" href="{href}"/>'
|
||||
)
|
||||
if lastmod:
|
||||
parts.append(f" <lastmod>{lastmod}</lastmod>")
|
||||
parts.append(" </url>")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _lang_alternates(base: str, path: str) -> list[tuple[str, str]]:
|
||||
"""Build hreflang alternate list for a lang-prefixed path."""
|
||||
alternates = []
|
||||
for lang in LANGS:
|
||||
alternates.append((lang, f"{base}/{lang}{path}"))
|
||||
alternates.append(("x-default", f"{base}/{DEFAULT_LANG}{path}"))
|
||||
return alternates
|
||||
|
||||
|
||||
async def _generate_sitemap_xml(base_url: str) -> str:
|
||||
"""Build sitemap XML from static paths + DB content."""
|
||||
base = base_url.rstrip("/")
|
||||
entries: list[str] = []
|
||||
|
||||
# Static pages — both lang variants, no lastmod (rarely changes)
|
||||
for path in STATIC_PATHS:
|
||||
alternates = _lang_alternates(base, path)
|
||||
for lang in LANGS:
|
||||
entries.append(_url_entry(f"{base}/{lang}{path}", alternates))
|
||||
|
||||
# Billing pricing — no lang prefix, no hreflang
|
||||
entries.append(_url_entry(f"{base}/billing/pricing", []))
|
||||
|
||||
# Published articles — both lang variants with accurate lastmod
|
||||
articles = await fetch_all(
|
||||
"""SELECT url_path, COALESCE(updated_at, published_at) AS lastmod
|
||||
FROM articles
|
||||
WHERE status = 'published' AND published_at <= datetime('now')
|
||||
ORDER BY published_at DESC
|
||||
LIMIT 25000"""
|
||||
)
|
||||
for article in articles:
|
||||
lastmod = article["lastmod"][:10] if article["lastmod"] else None
|
||||
alternates = _lang_alternates(base, article["url_path"])
|
||||
for lang in LANGS:
|
||||
entries.append(
|
||||
_url_entry(f"{base}/{lang}{article['url_path']}", alternates, lastmod)
|
||||
)
|
||||
|
||||
# Supplier detail pages — both lang variants
|
||||
suppliers = await fetch_all(
|
||||
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
|
||||
)
|
||||
for supplier in suppliers:
|
||||
lastmod = supplier["created_at"][:10] if supplier["created_at"] else None
|
||||
path = f"/directory/{supplier['slug']}"
|
||||
alternates = _lang_alternates(base, path)
|
||||
for lang in LANGS:
|
||||
entries.append(
|
||||
_url_entry(f"{base}/{lang}{path}", alternates, lastmod)
|
||||
)
|
||||
|
||||
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
xml += (
|
||||
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
|
||||
' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
|
||||
)
|
||||
xml += "\n".join(entries)
|
||||
xml += "\n</urlset>"
|
||||
return xml
|
||||
|
||||
|
||||
async def sitemap_response(base_url: str) -> Response:
|
||||
"""Return cached sitemap XML, regenerating if stale (1-hour TTL)."""
|
||||
global _cache_xml, _cache_timestamp # noqa: PLW0603
|
||||
now = time.monotonic()
|
||||
if not _cache_xml or (now - _cache_timestamp) > CACHE_TTL_SECONDS:
|
||||
_cache_xml = await _generate_sitemap_xml(base_url)
|
||||
_cache_timestamp = now
|
||||
return Response(
|
||||
_cache_xml,
|
||||
content_type="application/xml",
|
||||
headers={"Cache-Control": f"public, max-age={CACHE_TTL_SECONDS}"},
|
||||
)
|
||||
@@ -16,6 +16,7 @@ from padelnomics.app import create_app
|
||||
from padelnomics.migrations.migrate import migrate
|
||||
|
||||
from padelnomics import core
|
||||
from padelnomics import sitemap as sitemap_mod
|
||||
|
||||
_SCHEMA_CACHE = None
|
||||
|
||||
@@ -57,6 +58,9 @@ async def db():
|
||||
|
||||
original_db = core._db
|
||||
core._db = conn
|
||||
# Clear sitemap cache so tests see fresh DB state
|
||||
sitemap_mod._cache_xml = ""
|
||||
sitemap_mod._cache_timestamp = 0.0
|
||||
|
||||
yield conn
|
||||
|
||||
|
||||
Reference in New Issue
Block a user