#!/bin/sh # Materia Supervisor - Continuous pipeline orchestration # Inspired by TigerBeetle's CFO supervisor: simple, resilient, easy to understand # https://github.com/tigerbeetle/tigerbeetle/blob/main/src/scripts/cfo_supervisor.sh # # Environment variables (set in systemd EnvironmentFile): # LANDING_DIR — local path for extracted landing data # DUCKDB_PATH — path to DuckDB lakehouse file # ALERT_WEBHOOK_URL — optional ntfy.sh / Slack / Telegram webhook for failure alerts set -eu readonly REPO_DIR="/opt/materia" while true do ( # Clone repo if missing if ! [ -d "$REPO_DIR/.git" ] then echo "Repository not found, bootstrap required!" exit 1 fi cd "$REPO_DIR" # Update code from git git fetch origin master git switch --discard-changes --detach origin/master uv sync # Extract all data sources LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \ uv run materia pipeline run extract LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \ uv run materia pipeline run extract_cot LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \ uv run materia pipeline run extract_prices LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \ uv run materia pipeline run extract_ice # Transform all data sources LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \ DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \ uv run materia pipeline run transform ) || { # Notify on failure if webhook is configured, then sleep to avoid busy-loop if [ -n "${ALERT_WEBHOOK_URL:-}" ]; then curl -s -d "Materia pipeline failed at $(date)" "$ALERT_WEBHOOK_URL" 2>/dev/null || true fi sleep 600 # Sleep 10 min on failure } done