diff --git a/web/src/padelnomics/admin/pipeline_routes.py b/web/src/padelnomics/admin/pipeline_routes.py
index 79b632f..cea4070 100644
--- a/web/src/padelnomics/admin/pipeline_routes.py
+++ b/web/src/padelnomics/admin/pipeline_routes.py
@@ -66,6 +66,226 @@ _BLOCKED_SQL_RE = re.compile(
re.IGNORECASE,
)
+# ── Lineage DAG ───────────────────────────────────────────────────────────────
+#
+# Canonical model dependency map: model_name → [upstream_dependencies].
+# Layer is derived from name prefix: stg_* = staging, dim_*/fct_* = foundation,
+# everything else = serving.
+# Update this dict whenever models are added or removed from transform/.
+_DAG: dict[str, list[str]] = {
+ # Staging — read landing zone files, no model deps
+ "stg_padel_courts": [],
+ "stg_playtomic_venues": [],
+ "stg_playtomic_resources": [],
+ "stg_playtomic_opening_hours": [],
+ "stg_playtomic_availability": [],
+ "stg_population": [],
+ "stg_population_usa": [],
+ "stg_population_uk": [],
+ "stg_population_geonames": [],
+ "stg_income": [],
+ "stg_income_usa": [],
+ "stg_city_labels": [],
+ "stg_nuts2_boundaries": [],
+ "stg_regional_income": [],
+ "stg_tennis_courts": [],
+ # Foundation
+ "dim_venues": ["stg_playtomic_venues", "stg_playtomic_resources", "stg_padel_courts"],
+ "dim_cities": [
+ "dim_venues", "stg_income", "stg_city_labels",
+ "stg_population", "stg_population_usa", "stg_population_uk", "stg_population_geonames",
+ ],
+ "dim_locations": [
+ "stg_population_geonames", "stg_income", "stg_nuts2_boundaries",
+ "stg_regional_income", "stg_income_usa", "stg_padel_courts", "stg_tennis_courts",
+ ],
+ "dim_venue_capacity": [
+ "stg_playtomic_venues", "stg_playtomic_resources", "stg_playtomic_opening_hours",
+ ],
+ "fct_availability_slot": ["stg_playtomic_availability"],
+ "fct_daily_availability": ["fct_availability_slot", "dim_venue_capacity"],
+ # Serving
+ "venue_pricing_benchmarks": ["fct_daily_availability"],
+ "city_market_profile": ["dim_cities", "venue_pricing_benchmarks"],
+ "planner_defaults": ["venue_pricing_benchmarks", "city_market_profile"],
+ "location_opportunity_profile": ["dim_locations"],
+ "pseo_city_costs_de": [
+ "city_market_profile", "planner_defaults", "location_opportunity_profile",
+ ],
+ "pseo_city_pricing": ["venue_pricing_benchmarks", "city_market_profile"],
+ "pseo_country_overview": ["pseo_city_costs_de"],
+}
+
+
+def _classify_layer(name: str) -> str:
+ """Return 'staging', 'foundation', or 'serving' for a model name."""
+ if name.startswith("stg_"):
+ return "staging"
+ if name.startswith("dim_") or name.startswith("fct_"):
+ return "foundation"
+ return "serving"
+
+
+def _render_lineage_svg(dag: dict[str, list[str]]) -> str:
+ """Render the 3-layer model dependency DAG as an SVG string.
+
+ Layout: three vertical swim lanes (staging / foundation / serving) with
+ nodes stacked top-to-bottom in each lane. Edges are cubic bezier paths
+ flowing left-to-right. No external dependencies — pure Python string
+ construction.
+ """
+ # ── Layout constants ───────────────────────────────────────────────────
+ CHAR_WIDTH_PX = 7.4 # approximate monospace char width at 11px
+ NODE_PAD_H = 10 # horizontal padding inside node rect
+ NODE_H = 26 # node height
+ NODE_VGAP = 10 # vertical gap between nodes in same lane
+ LANE_PAD_TOP = 52 # space for lane header
+ LANE_PAD_BOTTOM = 24
+ LANE_INNER_W = 210 # inner usable width per lane
+ LANE_GAP = 40 # gap between lanes
+ LANE_PAD_LEFT = 16 # left padding inside lane bg
+
+ LANE_COLORS = {
+ "staging": {"bg": "#F0FDF4", "border": "#BBF7D0", "accent": "#16A34A",
+ "fill": "#DCFCE7", "text": "#14532D"},
+ "foundation": {"bg": "#EFF6FF", "border": "#BFDBFE", "accent": "#1D4ED8",
+ "fill": "#DBEAFE", "text": "#1E3A8A"},
+ "serving": {"bg": "#FFFBEB", "border": "#FDE68A", "accent": "#D97706",
+ "fill": "#FEF3C7", "text": "#78350F"},
+ }
+ LANE_ORDER = ["staging", "foundation", "serving"]
+ LANE_LABELS = {"staging": "STAGING", "foundation": "FOUNDATION", "serving": "SERVING"}
+
+ # ── Group and sort nodes per layer ─────────────────────────────────────
+ # Count how many nodes each node is depended upon by (downstream count)
+ downstream: dict[str, int] = {n: 0 for n in dag}
+ for deps in dag.values():
+ for d in deps:
+ downstream[d] = downstream.get(d, 0) + 1
+
+ layers: dict[str, list[str]] = {"staging": [], "foundation": [], "serving": []}
+ for name in dag:
+ layers[_classify_layer(name)].append(name)
+ for layer_name, nodes in layers.items():
+ # Sort: most-connected first (hub nodes near vertical center), then alpha
+ nodes.sort(key=lambda n: (-downstream.get(n, 0), n))
+
+ # ── Compute node widths ────────────────────────────────────────────────
+ def node_w(name: str) -> float:
+ return max(len(name) * CHAR_WIDTH_PX + NODE_PAD_H * 2, 80.0)
+
+ # ── Assign positions ───────────────────────────────────────────────────
+ # x = left edge of lane background; node rect starts at x + LANE_PAD_LEFT
+ lane_x: dict[str, float] = {}
+ x_cursor = 0.0
+ for lane in LANE_ORDER:
+ lane_x[lane] = x_cursor
+ x_cursor += LANE_INNER_W + LANE_PAD_LEFT * 2 + LANE_GAP
+
+ positions: dict[str, tuple[float, float]] = {} # node → (rect_x, rect_y)
+ lane_heights: dict[str, float] = {}
+ for lane in LANE_ORDER:
+ nodes = layers[lane]
+ y = LANE_PAD_TOP
+ for name in nodes:
+ rx = lane_x[lane] + LANE_PAD_LEFT
+ positions[name] = (rx, y)
+ y += NODE_H + NODE_VGAP
+ lane_heights[lane] = y + LANE_PAD_BOTTOM - NODE_VGAP
+
+ total_w = x_cursor - LANE_GAP
+ total_h = max(lane_heights.values())
+
+ # ── SVG assembly ───────────────────────────────────────────────────────
+ parts: list[str] = []
+
+ # Arrowhead marker
+ parts.append(
+ '
Data Lineage + + {{ node_count }} models — staging → foundation → serving + +
+