feat: sitemap hreflang alternates, caching, and lastmod cleanup
Extract sitemap generation to sitemap.py with xhtml:link hreflang alternates (en/de/x-default) on every URL entry. Add 1-hour in-memory TTL cache with Cache-Control header. Include supplier pages in both languages (were EN-only). Drop misleading "today" lastmod from static pages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Supervisor: added daily sleep interval between pipeline runs
|
- Supervisor: added daily sleep interval between pipeline runs
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
- **Sitemap: hreflang alternates + caching** — extracted sitemap generation to
|
||||||
|
`sitemap.py`; each URL entry now includes `xhtml:link` hreflang alternates
|
||||||
|
(en, de, x-default) for correct international SEO signaling; supplier detail
|
||||||
|
pages now listed in both EN and DE (were EN-only); removed misleading "today"
|
||||||
|
lastmod from static pages; added 1-hour in-memory TTL cache with
|
||||||
|
`Cache-Control: public, max-age=3600` response header
|
||||||
- **Playtomic availability extractor** (`playtomic_availability.py`) — daily next-day booking
|
- **Playtomic availability extractor** (`playtomic_availability.py`) — daily next-day booking
|
||||||
slot snapshots for occupancy rate estimation and pricing benchmarking; reads tenant IDs from
|
slot snapshots for occupancy rate estimation and pricing benchmarking; reads tenant IDs from
|
||||||
latest `tenants.json.gz`, queries `/v1/availability` per venue with 2s throttle, resumable
|
latest `tenants.json.gz`, queries `/v1/availability` per venue with 2s throttle, resumable
|
||||||
|
|||||||
@@ -251,63 +251,10 @@ def create_app() -> Quart:
|
|||||||
)
|
)
|
||||||
return Response(body, content_type="text/plain")
|
return Response(body, content_type="text/plain")
|
||||||
|
|
||||||
# sitemap.xml must live at root
|
|
||||||
@app.route("/sitemap.xml")
|
@app.route("/sitemap.xml")
|
||||||
async def sitemap():
|
async def sitemap():
|
||||||
from datetime import UTC, datetime
|
from .sitemap import sitemap_response
|
||||||
|
return await sitemap_response(config.BASE_URL)
|
||||||
from .core import fetch_all
|
|
||||||
base = config.BASE_URL.rstrip("/")
|
|
||||||
today = datetime.now(UTC).strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
# Both language variants of all SEO pages
|
|
||||||
static_paths = [
|
|
||||||
"", # landing
|
|
||||||
"/features",
|
|
||||||
"/about",
|
|
||||||
"/terms",
|
|
||||||
"/privacy",
|
|
||||||
"/imprint",
|
|
||||||
"/suppliers",
|
|
||||||
"/markets",
|
|
||||||
]
|
|
||||||
entries: list[tuple[str, str]] = []
|
|
||||||
for path in static_paths:
|
|
||||||
for lang in ("en", "de"):
|
|
||||||
entries.append((f"{base}/{lang}{path}", today))
|
|
||||||
|
|
||||||
# Planner + directory lang variants, billing (no lang)
|
|
||||||
for lang in ("en", "de"):
|
|
||||||
entries.append((f"{base}/{lang}/planner/", today))
|
|
||||||
entries.append((f"{base}/{lang}/directory/", today))
|
|
||||||
entries.append((f"{base}/billing/pricing", today))
|
|
||||||
|
|
||||||
# Published articles — both lang variants
|
|
||||||
articles = await fetch_all(
|
|
||||||
"""SELECT url_path, COALESCE(updated_at, published_at) as lastmod
|
|
||||||
FROM articles
|
|
||||||
WHERE status = 'published' AND published_at <= datetime('now')
|
|
||||||
ORDER BY published_at DESC"""
|
|
||||||
)
|
|
||||||
for article in articles:
|
|
||||||
lastmod = article["lastmod"][:10] if article["lastmod"] else today
|
|
||||||
for lang in ("en", "de"):
|
|
||||||
entries.append((f"{base}/{lang}{article['url_path']}", lastmod))
|
|
||||||
|
|
||||||
# Supplier detail pages (English only — canonical)
|
|
||||||
suppliers = await fetch_all(
|
|
||||||
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
|
|
||||||
)
|
|
||||||
for supplier in suppliers:
|
|
||||||
lastmod = supplier["created_at"][:10] if supplier["created_at"] else today
|
|
||||||
entries.append((f"{base}/en/directory/{supplier['slug']}", lastmod))
|
|
||||||
|
|
||||||
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
|
||||||
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
|
|
||||||
for loc, lastmod in entries:
|
|
||||||
xml += f" <url><loc>{loc}</loc><lastmod>{lastmod}</lastmod></url>\n"
|
|
||||||
xml += "</urlset>"
|
|
||||||
return Response(xml, content_type="application/xml")
|
|
||||||
|
|
||||||
# Health check
|
# Health check
|
||||||
@app.route("/health")
|
@app.route("/health")
|
||||||
|
|||||||
117
web/src/padelnomics/sitemap.py
Normal file
117
web/src/padelnomics/sitemap.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
"""Sitemap generation with in-memory TTL cache and hreflang alternates."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
from quart import Response
|
||||||
|
|
||||||
|
from .core import fetch_all
|
||||||
|
|
||||||
|
_cache_xml: str = ""
|
||||||
|
_cache_timestamp: float = 0.0
|
||||||
|
CACHE_TTL_SECONDS: int = 3600 # 1 hour
|
||||||
|
|
||||||
|
LANGS = ("en", "de")
|
||||||
|
DEFAULT_LANG = "en"
|
||||||
|
|
||||||
|
# Pages with lang prefix but no meaningful lastmod
|
||||||
|
STATIC_PATHS = [
|
||||||
|
"", # landing
|
||||||
|
"/features",
|
||||||
|
"/about",
|
||||||
|
"/terms",
|
||||||
|
"/privacy",
|
||||||
|
"/imprint",
|
||||||
|
"/suppliers",
|
||||||
|
"/markets",
|
||||||
|
"/planner/",
|
||||||
|
"/directory/",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _url_entry(loc: str, alternates: list[tuple[str, str]], lastmod: str | None = None) -> str:
|
||||||
|
"""Build a single <url> entry with optional hreflang alternates and lastmod."""
|
||||||
|
parts = [f" <url>\n <loc>{loc}</loc>"]
|
||||||
|
for hreflang, href in alternates:
|
||||||
|
parts.append(
|
||||||
|
f' <xhtml:link rel="alternate" hreflang="{hreflang}" href="{href}"/>'
|
||||||
|
)
|
||||||
|
if lastmod:
|
||||||
|
parts.append(f" <lastmod>{lastmod}</lastmod>")
|
||||||
|
parts.append(" </url>")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _lang_alternates(base: str, path: str) -> list[tuple[str, str]]:
|
||||||
|
"""Build hreflang alternate list for a lang-prefixed path."""
|
||||||
|
alternates = []
|
||||||
|
for lang in LANGS:
|
||||||
|
alternates.append((lang, f"{base}/{lang}{path}"))
|
||||||
|
alternates.append(("x-default", f"{base}/{DEFAULT_LANG}{path}"))
|
||||||
|
return alternates
|
||||||
|
|
||||||
|
|
||||||
|
async def _generate_sitemap_xml(base_url: str) -> str:
|
||||||
|
"""Build sitemap XML from static paths + DB content."""
|
||||||
|
base = base_url.rstrip("/")
|
||||||
|
entries: list[str] = []
|
||||||
|
|
||||||
|
# Static pages — both lang variants, no lastmod (rarely changes)
|
||||||
|
for path in STATIC_PATHS:
|
||||||
|
alternates = _lang_alternates(base, path)
|
||||||
|
for lang in LANGS:
|
||||||
|
entries.append(_url_entry(f"{base}/{lang}{path}", alternates))
|
||||||
|
|
||||||
|
# Billing pricing — no lang prefix, no hreflang
|
||||||
|
entries.append(_url_entry(f"{base}/billing/pricing", []))
|
||||||
|
|
||||||
|
# Published articles — both lang variants with accurate lastmod
|
||||||
|
articles = await fetch_all(
|
||||||
|
"""SELECT url_path, COALESCE(updated_at, published_at) AS lastmod
|
||||||
|
FROM articles
|
||||||
|
WHERE status = 'published' AND published_at <= datetime('now')
|
||||||
|
ORDER BY published_at DESC
|
||||||
|
LIMIT 25000"""
|
||||||
|
)
|
||||||
|
for article in articles:
|
||||||
|
lastmod = article["lastmod"][:10] if article["lastmod"] else None
|
||||||
|
alternates = _lang_alternates(base, article["url_path"])
|
||||||
|
for lang in LANGS:
|
||||||
|
entries.append(
|
||||||
|
_url_entry(f"{base}/{lang}{article['url_path']}", alternates, lastmod)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Supplier detail pages — both lang variants
|
||||||
|
suppliers = await fetch_all(
|
||||||
|
"SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
|
||||||
|
)
|
||||||
|
for supplier in suppliers:
|
||||||
|
lastmod = supplier["created_at"][:10] if supplier["created_at"] else None
|
||||||
|
path = f"/directory/{supplier['slug']}"
|
||||||
|
alternates = _lang_alternates(base, path)
|
||||||
|
for lang in LANGS:
|
||||||
|
entries.append(
|
||||||
|
_url_entry(f"{base}/{lang}{path}", alternates, lastmod)
|
||||||
|
)
|
||||||
|
|
||||||
|
xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||||
|
xml += (
|
||||||
|
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
|
||||||
|
' xmlns:xhtml="http://www.w3.org/1999/xhtml">\n'
|
||||||
|
)
|
||||||
|
xml += "\n".join(entries)
|
||||||
|
xml += "\n</urlset>"
|
||||||
|
return xml
|
||||||
|
|
||||||
|
|
||||||
|
async def sitemap_response(base_url: str) -> Response:
|
||||||
|
"""Return cached sitemap XML, regenerating if stale (1-hour TTL)."""
|
||||||
|
global _cache_xml, _cache_timestamp # noqa: PLW0603
|
||||||
|
now = time.monotonic()
|
||||||
|
if not _cache_xml or (now - _cache_timestamp) > CACHE_TTL_SECONDS:
|
||||||
|
_cache_xml = await _generate_sitemap_xml(base_url)
|
||||||
|
_cache_timestamp = now
|
||||||
|
return Response(
|
||||||
|
_cache_xml,
|
||||||
|
content_type="application/xml",
|
||||||
|
headers={"Cache-Control": f"public, max-age={CACHE_TTL_SECONDS}"},
|
||||||
|
)
|
||||||
@@ -12,10 +12,12 @@ from unittest.mock import AsyncMock, patch
|
|||||||
|
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
import pytest
|
import pytest
|
||||||
from padelnomics import core
|
|
||||||
from padelnomics.app import create_app
|
from padelnomics.app import create_app
|
||||||
from padelnomics.migrations.migrate import migrate
|
from padelnomics.migrations.migrate import migrate
|
||||||
|
|
||||||
|
from padelnomics import core
|
||||||
|
from padelnomics import sitemap as sitemap_mod
|
||||||
|
|
||||||
_SCHEMA_CACHE = None
|
_SCHEMA_CACHE = None
|
||||||
|
|
||||||
|
|
||||||
@@ -56,6 +58,9 @@ async def db():
|
|||||||
|
|
||||||
original_db = core._db
|
original_db = core._db
|
||||||
core._db = conn
|
core._db = conn
|
||||||
|
# Clear sitemap cache so tests see fresh DB state
|
||||||
|
sitemap_mod._cache_xml = ""
|
||||||
|
sitemap_mod._cache_timestamp = 0.0
|
||||||
|
|
||||||
yield conn
|
yield conn
|
||||||
|
|
||||||
@@ -147,6 +152,7 @@ def create_subscription(db):
|
|||||||
async def scenario(db, test_user):
|
async def scenario(db, test_user):
|
||||||
"""User scenario with valid planner state for PDF generation."""
|
"""User scenario with valid planner state for PDF generation."""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from padelnomics.planner.calculator import validate_state
|
from padelnomics.planner.calculator import validate_state
|
||||||
state = validate_state({"dblCourts": 4, "sglCourts": 2})
|
state = validate_state({"dblCourts": 4, "sglCourts": 2})
|
||||||
now = datetime.utcnow().isoformat()
|
now = datetime.utcnow().isoformat()
|
||||||
|
|||||||
Reference in New Issue
Block a user