Split the single lakehouse.duckdb into two files to eliminate the exclusive write-lock conflict between SQLMesh (pipeline) and the Quart web app (reader): lakehouse.duckdb — SQLMesh exclusive (all pipeline layers) serving.duckdb — web app reads (serving tables only, atomically swapped) Changes: web/src/beanflows/analytics.py - Replace persistent global _conn with per-thread connections (threading.local) - Add _get_conn(): opens read_only=True on first call per thread, reopens automatically on inode change (~1μs os.stat) to pick up atomic file swaps - Switch env var from DUCKDB_PATH → SERVING_DUCKDB_PATH - Add module docstring documenting architecture + DuckLake migration path web/src/beanflows/app.py - Startup check: use SERVING_DUCKDB_PATH - Health check: use _db_path instead of _conn src/materia/export_serving.py (new) - Reads all serving.* tables from lakehouse.duckdb (read_only) - Writes to serving_new.duckdb, then os.rename → serving.duckdb (atomic) - ~50 lines; runs after each SQLMesh transform src/materia/pipelines.py - Add export_serving pipeline entry (uv run python -c ...) infra/supervisor/supervisor.sh - Add SERVING_DUCKDB_PATH env var comment - Add export step: uv run materia pipeline run export_serving infra/supervisor/materia-supervisor.service - Add Environment=SERVING_DUCKDB_PATH=/data/materia/serving.duckdb infra/bootstrap_supervisor.sh - Add SERVING_DUCKDB_PATH to .env template web/.env.example + web/docker-compose.yml - Document both env vars; switch web service to SERVING_DUCKDB_PATH web/src/beanflows/dashboard/templates/settings.html - Minor settings page fix from prior session Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
118 lines
3.4 KiB
Bash
Executable File
118 lines
3.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Bootstrap script for Materia supervisor instance
|
|
# Run this once on a new supervisor to set it up
|
|
#
|
|
# Usage:
|
|
# From CI/CD or locally:
|
|
# ssh root@<supervisor_ip> 'bash -s' < infra/bootstrap_supervisor.sh
|
|
#
|
|
# Or on the supervisor itself:
|
|
# curl -fsSL <url-to-this-script> | bash
|
|
|
|
set -euo pipefail
|
|
|
|
echo "=== Materia Supervisor Bootstrap ==="
|
|
echo "This script will:"
|
|
echo " 1. Install dependencies (git, uv, esc)"
|
|
echo " 2. Clone the materia repository"
|
|
echo " 3. Setup systemd service"
|
|
echo " 4. Start the supervisor"
|
|
echo ""
|
|
|
|
# Check if we're root
|
|
if [ "$EUID" -ne 0 ]; then
|
|
echo "ERROR: This script must be run as root"
|
|
exit 1
|
|
fi
|
|
|
|
# Configuration
|
|
REPO_DIR="/opt/materia"
|
|
GITLAB_PROJECT="deemanone/materia"
|
|
|
|
# GITLAB_READ_TOKEN should be set in Pulumi ESC (beanflows/prod)
|
|
if [ -z "${GITLAB_READ_TOKEN:-}" ]; then
|
|
echo "ERROR: GITLAB_READ_TOKEN environment variable not set"
|
|
echo "Please add it to Pulumi ESC (beanflows/prod) first"
|
|
exit 1
|
|
fi
|
|
|
|
REPO_URL="https://gitlab-ci-token:${GITLAB_READ_TOKEN}@gitlab.com/${GITLAB_PROJECT}.git"
|
|
|
|
echo "--- Installing system dependencies ---"
|
|
apt-get update
|
|
apt-get install -y git curl python3-pip
|
|
|
|
echo "--- Installing uv ---"
|
|
if ! command -v uv &> /dev/null; then
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="$HOME/.cargo/bin:$PATH"
|
|
echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /root/.bashrc
|
|
fi
|
|
|
|
echo "--- Installing Pulumi ESC ---"
|
|
if ! command -v esc &> /dev/null; then
|
|
curl -fsSL https://get.pulumi.com/esc/install.sh | sh
|
|
export PATH="$HOME/.pulumi/bin:$PATH"
|
|
echo 'export PATH="$HOME/.pulumi/bin:$PATH"' >> /root/.bashrc
|
|
fi
|
|
|
|
echo "--- Setting up Pulumi ESC authentication ---"
|
|
if [ -z "${PULUMI_ACCESS_TOKEN:-}" ]; then
|
|
echo "ERROR: PULUMI_ACCESS_TOKEN environment variable not set"
|
|
echo "Please set it before running this script:"
|
|
echo " export PULUMI_ACCESS_TOKEN=<your-token>"
|
|
exit 1
|
|
fi
|
|
|
|
esc login --token "$PULUMI_ACCESS_TOKEN"
|
|
|
|
echo "--- Loading secrets from Pulumi ESC ---"
|
|
eval $(esc env open beanflows/prod --format shell)
|
|
|
|
echo "--- Cloning repository ---"
|
|
if [ -d "$REPO_DIR" ]; then
|
|
echo "Repository already exists, pulling latest..."
|
|
cd "$REPO_DIR"
|
|
git pull origin master
|
|
else
|
|
git clone "$REPO_URL" "$REPO_DIR"
|
|
cd "$REPO_DIR"
|
|
fi
|
|
|
|
echo "--- Creating data directories ---"
|
|
mkdir -p /data/materia/landing/psd
|
|
|
|
echo "--- Installing Python dependencies ---"
|
|
uv sync
|
|
|
|
echo "--- Creating environment file ---"
|
|
cat > "$REPO_DIR/.env" <<EOF
|
|
# Environment variables for supervisor
|
|
# Loaded from Pulumi ESC: beanflows/prod
|
|
PULUMI_ACCESS_TOKEN=${PULUMI_ACCESS_TOKEN}
|
|
PATH=/root/.cargo/bin:/root/.pulumi/bin:/usr/local/bin:/usr/bin:/bin
|
|
LANDING_DIR=/data/materia/landing
|
|
DUCKDB_PATH=/data/materia/lakehouse.duckdb
|
|
SERVING_DUCKDB_PATH=/data/materia/serving.duckdb
|
|
EOF
|
|
|
|
echo "--- Setting up systemd service ---"
|
|
cp "$REPO_DIR/infra/supervisor/materia-supervisor.service" /etc/systemd/system/materia-supervisor.service
|
|
|
|
echo "--- Enabling and starting service ---"
|
|
systemctl daemon-reload
|
|
systemctl enable materia-supervisor
|
|
systemctl start materia-supervisor
|
|
|
|
echo ""
|
|
echo "=== Bootstrap complete! ==="
|
|
echo ""
|
|
echo "Supervisor is now running. Check status with:"
|
|
echo " systemctl status materia-supervisor"
|
|
echo ""
|
|
echo "View logs with:"
|
|
echo " journalctl -u materia-supervisor -f"
|
|
echo ""
|
|
echo "Repository location: $REPO_DIR"
|
|
echo "Current commit: $(cd $REPO_DIR && git rev-parse --short HEAD)"
|