Files
padelnomics/infra/supervisor/supervisor.sh
Deeman 2db66efe77 feat: migrate transform to 3-layer architecture with per-layer schemas
Remove raw/ layer — staging models now read landing JSON directly.
Rename all model schemas from padelnomics.* to staging.*/foundation.*/serving.*.
Web app queries updated to serving.planner_defaults via SERVING_DUCKDB_PATH.
Supervisor gets daily sleep interval between pipeline runs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 19:04:40 +01:00

58 lines
2.1 KiB
Bash

#!/bin/sh
# Padelnomics Supervisor — continuous pipeline orchestration.
# Inspired by TigerBeetle's CFO supervisor: simple, resilient, easy to understand.
# https://github.com/tigerbeetle/tigerbeetle/blob/main/src/scripts/cfo_supervisor.sh
#
# Environment variables (set in systemd EnvironmentFile or .env):
# LANDING_DIR — local path for extracted landing data
# DUCKDB_PATH — path to DuckDB lakehouse (pipeline DB, SQLMesh exclusive)
# SERVING_DUCKDB_PATH — path to serving-only DuckDB (web app reads from here)
# ALERT_WEBHOOK_URL — optional ntfy.sh / Slack / Telegram webhook for failures
set -eu
readonly REPO_DIR="/opt/padelnomics"
while true
do
(
if ! [ -d "$REPO_DIR/.git" ]; then
echo "Repository not found at $REPO_DIR — bootstrap required!"
exit 1
fi
cd "$REPO_DIR"
# Pull latest code
git fetch origin master
git switch --discard-changes --detach origin/master
uv sync
# Extract
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
uv run --package padelnomics_extract extract
# Transform
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
uv run --package sqlmesh_padelnomics sqlmesh run --select-model "serving.*"
# Export serving tables to analytics.duckdb (atomic swap).
# The web app detects the inode change on next query — no restart needed.
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
SERVING_DUCKDB_PATH="${SERVING_DUCKDB_PATH:-/data/padelnomics/analytics.duckdb}" \
uv run python -m padelnomics.export_serving
) || {
if [ -n "${ALERT_WEBHOOK_URL:-}" ]; then
curl -s -d "Padelnomics pipeline failed at $(date)" \
"$ALERT_WEBHOOK_URL" 2>/dev/null || true
fi
sleep 600 # back off 10 min on failure
continue
}
sleep 86400 # run once per day
done