#!/bin/bash # Materia Supervisor - Continuous pipeline orchestration # Inspired by TigerBeetle's CFO supervisor pattern # https://github.com/tigerbeetle/tigerbeetle/blob/main/src/scripts/cfo_supervisor.sh set -euo pipefail # Configuration readonly CHECK_INTERVAL=900 # 15 minutes readonly CLI_VERSION_CHECK_INTERVAL=3600 # 1 hour readonly MATERIA_DIR="/opt/materia" readonly R2_ARTIFACTS_URL="https://${R2_ENDPOINT}/${R2_ARTIFACTS_BUCKET}" readonly CLI_ARTIFACT="materia-cli-latest.tar.gz" # Schedules (cron-style times in UTC) readonly EXTRACT_SCHEDULE_HOUR=2 # 02:00 UTC readonly TRANSFORM_SCHEDULE_HOUR=3 # 03:00 UTC # State tracking last_extract_run="" last_transform_run="" last_cli_check=0 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" } log_error() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2 } # Check if CLI needs updating check_cli_update() { local now now=$(date +%s) # Only check once per hour if (( now - last_cli_check < CLI_VERSION_CHECK_INTERVAL )); then return 0 fi last_cli_check=$now log "Checking for CLI updates..." # Download new version local temp_file="${MATERIA_DIR}/cli-new.tar.gz" if ! curl -fsSL -o "$temp_file" "${R2_ARTIFACTS_URL}/${CLI_ARTIFACT}"; then log_error "Failed to download CLI artifact" return 1 fi # Compare checksums local old_checksum="" local new_checksum if [ -f "${MATERIA_DIR}/${CLI_ARTIFACT}" ]; then old_checksum=$(sha256sum "${MATERIA_DIR}/${CLI_ARTIFACT}" | awk '{print $1}') fi new_checksum=$(sha256sum "$temp_file" | awk '{print $1}') if [ "$old_checksum" = "$new_checksum" ]; then log "CLI is up to date" rm -f "$temp_file" return 0 fi log "New CLI version detected, updating..." # Install new version mv "$temp_file" "${MATERIA_DIR}/${CLI_ARTIFACT}" cd "$MATERIA_DIR" rm -rf cli && mkdir -p cli tar -xzf "$CLI_ARTIFACT" -C cli/ if pip3 install --force-reinstall cli/*.whl; then log "CLI updated successfully" materia version else log_error "Failed to install CLI" return 1 fi } # Check if we should run extract pipeline (daily at specified hour) should_run_extract() { local current_hour local current_date current_hour=$(date -u +%H) current_date=$(date -u +%Y-%m-%d) # Only run at the scheduled hour if [ "$current_hour" != "$EXTRACT_SCHEDULE_HOUR" ]; then return 1 fi # Only run once per day if [ "$last_extract_run" = "$current_date" ]; then return 1 fi return 0 } # Check if we should run transform pipeline (daily at specified hour) should_run_transform() { local current_hour local current_date current_hour=$(date -u +%H) current_date=$(date -u +%Y-%m-%d) # Only run at the scheduled hour if [ "$current_hour" != "$TRANSFORM_SCHEDULE_HOUR" ]; then return 1 fi # Only run once per day if [ "$last_transform_run" = "$current_date" ]; then return 1 fi return 0 } # Run extract pipeline run_extract() { log "Starting extract pipeline..." if materia pipeline run extract; then log "Extract pipeline completed successfully" last_extract_run=$(date -u +%Y-%m-%d) else log_error "Extract pipeline failed" return 1 fi } # Run transform pipeline run_transform() { log "Starting transform pipeline..." if materia pipeline run transform; then log "Transform pipeline completed successfully" last_transform_run=$(date -u +%Y-%m-%d) else log_error "Transform pipeline failed" return 1 fi } # Main supervisor loop main() { log "Materia supervisor starting..." log "Extract schedule: daily at ${EXTRACT_SCHEDULE_HOUR}:00 UTC" log "Transform schedule: daily at ${TRANSFORM_SCHEDULE_HOUR}:00 UTC" log "Check interval: ${CHECK_INTERVAL}s" # Initial CLI check check_cli_update || log_error "Initial CLI check failed, continuing anyway" while true; do # Check for CLI updates check_cli_update || true # Check and run extract pipeline if should_run_extract; then run_extract || true fi # Check and run transform pipeline if should_run_transform; then run_transform || true fi sleep "$CHECK_INTERVAL" done } # Run main loop main