feat: migrate transform to 3-layer architecture with per-layer schemas

Remove raw/ layer — staging models now read landing JSON directly.
Rename all model schemas from padelnomics.* to staging.*/foundation.*/serving.*.
Web app queries updated to serving.planner_defaults via SERVING_DUCKDB_PATH.
Supervisor gets daily sleep interval between pipeline runs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-22 19:04:40 +01:00
parent 53e9bbd66b
commit 2db66efe77
19 changed files with 306 additions and 301 deletions

View File

@@ -7,7 +7,7 @@ All queries run via asyncio.to_thread() to avoid blocking the event loop.
Usage:
from .analytics import fetch_analytics
rows = await fetch_analytics("SELECT * FROM padelnomics.planner_defaults WHERE city_slug = ?", ["berlin"])
rows = await fetch_analytics("SELECT * FROM serving.planner_defaults WHERE city_slug = ?", ["berlin"])
"""
import asyncio
import os
@@ -17,7 +17,7 @@ from typing import Any
import duckdb
_conn: duckdb.DuckDBPyConnection | None = None
_DUCKDB_PATH = os.environ.get("DUCKDB_PATH", "data/lakehouse.duckdb")
_DUCKDB_PATH = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb")
def open_analytics_db() -> None:

View File

@@ -603,7 +603,7 @@ async def market_data():
from ..analytics import fetch_analytics
rows = await fetch_analytics(
"SELECT * FROM padelnomics.planner_defaults WHERE city_slug = ? LIMIT 1",
"SELECT * FROM serving.planner_defaults WHERE city_slug = ? LIMIT 1",
[city_slug],
)
if not rows:

View File

@@ -1,7 +1,7 @@
"""
Refresh template_data rows from DuckDB analytics serving layer.
Reads per-city market data from the `padelnomics.planner_defaults` serving table
Reads per-city market data from the `serving.planner_defaults` serving table
and overwrites matching static values in `template_data.data_json`. This keeps
article financial model inputs in sync with the real-world data pipeline output.
@@ -81,7 +81,7 @@ def _load_analytics(city_slugs: list[str]) -> dict[str, dict]:
conn = duckdb.connect(str(path), read_only=True)
placeholders = ", ".join(["?"] * len(city_slugs))
rows = conn.execute(
f"SELECT * FROM padelnomics.planner_defaults WHERE city_slug IN ({placeholders})",
f"SELECT * FROM serving.planner_defaults WHERE city_slug IN ({placeholders})",
city_slugs,
).fetchall()
cols = [d[0] for d in conn.description]