#!/bin/sh # Padelnomics Supervisor — continuous pipeline orchestration. # Inspired by TigerBeetle's CFO supervisor: simple, resilient, easy to understand. # https://github.com/tigerbeetle/tigerbeetle/blob/main/src/scripts/cfo_supervisor.sh # # Environment variables (set in systemd EnvironmentFile or .env): # LANDING_DIR — local path for extracted landing data # DUCKDB_PATH — path to DuckDB lakehouse (pipeline DB, SQLMesh exclusive) # SERVING_DUCKDB_PATH — path to serving-only DuckDB (web app reads from here) # ALERT_WEBHOOK_URL — optional ntfy.sh / Slack / Telegram webhook for failures set -eu readonly REPO_DIR="/opt/padelnomics" while true do ( if ! [ -d "$REPO_DIR/.git" ]; then echo "Repository not found at $REPO_DIR — bootstrap required!" exit 1 fi cd "$REPO_DIR" # Pull latest code git fetch origin master git switch --discard-changes --detach origin/master uv sync # Extract LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \ uv run --package padelnomics_extract extract # Transform LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \ uv run --package sqlmesh_padelnomics sqlmesh run --select-model "serving.*" # Export serving tables to analytics.duckdb (atomic swap). # The web app detects the inode change on next query — no restart needed. DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \ SERVING_DUCKDB_PATH="${SERVING_DUCKDB_PATH:-/data/padelnomics/analytics.duckdb}" \ uv run python -m padelnomics.export_serving ) || { if [ -n "${ALERT_WEBHOOK_URL:-}" ]; then curl -s -d "Padelnomics pipeline failed at $(date)" \ "$ALERT_WEBHOOK_URL" 2>/dev/null || true fi sleep 600 # back off 10 min on failure } done