diff --git a/CHANGELOG.md b/CHANGELOG.md
index bceecd3..1e98b87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- Supervisor: added daily sleep interval between pipeline runs
### Added
+- **Sitemap: hreflang alternates + caching** — extracted sitemap generation to
+ `sitemap.py`; each URL entry now includes `xhtml:link` hreflang alternates
+ (en, de, x-default) for correct international SEO signaling; supplier detail
+ pages now listed in both EN and DE (were EN-only); removed misleading "today"
+ lastmod from static pages; added 1-hour in-memory TTL cache with
+ `Cache-Control: public, max-age=3600` response header
- **Playtomic availability extractor** (`playtomic_availability.py`) — daily next-day booking
slot snapshots for occupancy rate estimation and pricing benchmarking; reads tenant IDs from
latest `tenants.json.gz`, queries `/v1/availability` per venue with 2s throttle, resumable
diff --git a/web/src/padelnomics/app.py b/web/src/padelnomics/app.py
index e54a817..30aad72 100644
--- a/web/src/padelnomics/app.py
+++ b/web/src/padelnomics/app.py
@@ -251,63 +251,10 @@ def create_app() -> Quart:
)
return Response(body, content_type="text/plain")
- # sitemap.xml must live at root
@app.route("/sitemap.xml")
async def sitemap():
- from datetime import UTC, datetime
-
- from .core import fetch_all
- base = config.BASE_URL.rstrip("/")
- today = datetime.now(UTC).strftime("%Y-%m-%d")
-
- # Both language variants of all SEO pages
- static_paths = [
- "", # landing
- "/features",
- "/about",
- "/terms",
- "/privacy",
- "/imprint",
- "/suppliers",
- "/markets",
- ]
- entries: list[tuple[str, str]] = []
- for path in static_paths:
- for lang in ("en", "de"):
- entries.append((f"{base}/{lang}{path}", today))
-
- # Planner + directory lang variants, billing (no lang)
- for lang in ("en", "de"):
- entries.append((f"{base}/{lang}/planner/", today))
- entries.append((f"{base}/{lang}/directory/", today))
- entries.append((f"{base}/billing/pricing", today))
-
- # Published articles — both lang variants
- articles = await fetch_all(
- """SELECT url_path, COALESCE(updated_at, published_at) as lastmod
- FROM articles
- WHERE status = 'published' AND published_at <= datetime('now')
- ORDER BY published_at DESC"""
- )
- for article in articles:
- lastmod = article["lastmod"][:10] if article["lastmod"] else today
- for lang in ("en", "de"):
- entries.append((f"{base}/{lang}{article['url_path']}", lastmod))
-
- # Supplier detail pages (English only — canonical)
- suppliers = await fetch_all(
- "SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
- )
- for supplier in suppliers:
- lastmod = supplier["created_at"][:10] if supplier["created_at"] else today
- entries.append((f"{base}/en/directory/{supplier['slug']}", lastmod))
-
- xml = '\n'
- xml += '\n'
- for loc, lastmod in entries:
- xml += f" {loc}{lastmod}\n"
- xml += ""
- return Response(xml, content_type="application/xml")
+ from .sitemap import sitemap_response
+ return await sitemap_response(config.BASE_URL)
# Health check
@app.route("/health")
diff --git a/web/src/padelnomics/sitemap.py b/web/src/padelnomics/sitemap.py
new file mode 100644
index 0000000..d7c1568
--- /dev/null
+++ b/web/src/padelnomics/sitemap.py
@@ -0,0 +1,117 @@
+"""Sitemap generation with in-memory TTL cache and hreflang alternates."""
+
+import time
+
+from quart import Response
+
+from .core import fetch_all
+
+_cache_xml: str = ""
+_cache_timestamp: float = 0.0
+CACHE_TTL_SECONDS: int = 3600 # 1 hour
+
+LANGS = ("en", "de")
+DEFAULT_LANG = "en"
+
+# Pages with lang prefix but no meaningful lastmod
+STATIC_PATHS = [
+ "", # landing
+ "/features",
+ "/about",
+ "/terms",
+ "/privacy",
+ "/imprint",
+ "/suppliers",
+ "/markets",
+ "/planner/",
+ "/directory/",
+]
+
+
+def _url_entry(loc: str, alternates: list[tuple[str, str]], lastmod: str | None = None) -> str:
+ """Build a single entry with optional hreflang alternates and lastmod."""
+ parts = [f" \n {loc}"]
+ for hreflang, href in alternates:
+ parts.append(
+ f' '
+ )
+ if lastmod:
+ parts.append(f" {lastmod}")
+ parts.append(" ")
+ return "\n".join(parts)
+
+
+def _lang_alternates(base: str, path: str) -> list[tuple[str, str]]:
+ """Build hreflang alternate list for a lang-prefixed path."""
+ alternates = []
+ for lang in LANGS:
+ alternates.append((lang, f"{base}/{lang}{path}"))
+ alternates.append(("x-default", f"{base}/{DEFAULT_LANG}{path}"))
+ return alternates
+
+
+async def _generate_sitemap_xml(base_url: str) -> str:
+ """Build sitemap XML from static paths + DB content."""
+ base = base_url.rstrip("/")
+ entries: list[str] = []
+
+ # Static pages — both lang variants, no lastmod (rarely changes)
+ for path in STATIC_PATHS:
+ alternates = _lang_alternates(base, path)
+ for lang in LANGS:
+ entries.append(_url_entry(f"{base}/{lang}{path}", alternates))
+
+ # Billing pricing — no lang prefix, no hreflang
+ entries.append(_url_entry(f"{base}/billing/pricing", []))
+
+ # Published articles — both lang variants with accurate lastmod
+ articles = await fetch_all(
+ """SELECT url_path, COALESCE(updated_at, published_at) AS lastmod
+ FROM articles
+ WHERE status = 'published' AND published_at <= datetime('now')
+ ORDER BY published_at DESC
+ LIMIT 25000"""
+ )
+ for article in articles:
+ lastmod = article["lastmod"][:10] if article["lastmod"] else None
+ alternates = _lang_alternates(base, article["url_path"])
+ for lang in LANGS:
+ entries.append(
+ _url_entry(f"{base}/{lang}{article['url_path']}", alternates, lastmod)
+ )
+
+ # Supplier detail pages — both lang variants
+ suppliers = await fetch_all(
+ "SELECT slug, created_at FROM suppliers ORDER BY name LIMIT 5000"
+ )
+ for supplier in suppliers:
+ lastmod = supplier["created_at"][:10] if supplier["created_at"] else None
+ path = f"/directory/{supplier['slug']}"
+ alternates = _lang_alternates(base, path)
+ for lang in LANGS:
+ entries.append(
+ _url_entry(f"{base}/{lang}{path}", alternates, lastmod)
+ )
+
+ xml = '\n'
+ xml += (
+ '\n'
+ )
+ xml += "\n".join(entries)
+ xml += "\n"
+ return xml
+
+
+async def sitemap_response(base_url: str) -> Response:
+ """Return cached sitemap XML, regenerating if stale (1-hour TTL)."""
+ global _cache_xml, _cache_timestamp # noqa: PLW0603
+ now = time.monotonic()
+ if not _cache_xml or (now - _cache_timestamp) > CACHE_TTL_SECONDS:
+ _cache_xml = await _generate_sitemap_xml(base_url)
+ _cache_timestamp = now
+ return Response(
+ _cache_xml,
+ content_type="application/xml",
+ headers={"Cache-Control": f"public, max-age={CACHE_TTL_SECONDS}"},
+ )
diff --git a/web/tests/conftest.py b/web/tests/conftest.py
index ecc5b80..207a0db 100644
--- a/web/tests/conftest.py
+++ b/web/tests/conftest.py
@@ -16,6 +16,7 @@ from padelnomics.app import create_app
from padelnomics.migrations.migrate import migrate
from padelnomics import core
+from padelnomics import sitemap as sitemap_mod
_SCHEMA_CACHE = None
@@ -57,6 +58,9 @@ async def db():
original_db = core._db
core._db = conn
+ # Clear sitemap cache so tests see fresh DB state
+ sitemap_mod._cache_xml = ""
+ sitemap_mod._cache_timestamp = 0.0
yield conn