diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index a135b10..208b859 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -98,6 +98,27 @@ analytics.duckdb ← serving tables only, web app read-only └── serving.* ← atomically replaced by export_serving.py ``` +## Backup & disaster recovery + +| Data | Tool | Target | Frequency | +|------|------|--------|-----------| +| `app.db` (auth, billing) | Litestream | R2 `padelnomics/app.db` | Continuous (WAL) | +| `.state.sqlite` (extraction state) | Litestream | R2 `padelnomics/state.sqlite` | Continuous (WAL) | +| `data/landing/` (JSON.gz files) | rclone sync | R2 `padelnomics/landing/` | Every 30 min (systemd timer) | +| `lakehouse.duckdb`, `analytics.duckdb` | N/A (derived) | Re-run pipeline | On demand | + +Recovery: +```bash +# App database (auto-restored by Litestream container on startup) +litestream restore -config /etc/litestream.yml /app/data/app.db + +# Extraction state (auto-restored by Litestream container on startup) +litestream restore -config /etc/litestream.yml /data/landing/.state.sqlite + +# Landing zone files +source /opt/padelnomics/.env && bash infra/restore_landing.sh +``` + ## Environment variables | Variable | Default | Description | diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bb0089..e07688d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] ### Added +- **Landing zone backup to R2** — append-only landing files (`data/landing/*.json.gz`) + synced to Cloudflare R2 every 30 minutes via systemd timer + rclone; extraction state + DB (`.state.sqlite`) continuously replicated via Litestream (second DB entry in existing + config); auto-restore on container startup for both `app.db` and `.state.sqlite`; + `infra/restore_landing.sh` script for disaster recovery of landing files; + `infra/landing-backup/` systemd service + timer units; rclone installation added to + `infra/setup_server.sh`; reuses existing R2 bucket and credentials (no new env vars) - **Admin Email Hub** (`/admin/emails`) — full email management dashboard with: sent log (filterable by type/event/search, HTMX partial updates), email detail with Resend API enrichment for HTML preview, inbound inbox with unread badges diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index ada76a7..a7e4023 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -31,11 +31,17 @@ services: litestream restore -config /etc/litestream.yml /app/data/app.db \ || echo "==> No backup found, starting fresh" fi + if [ ! -f /data/landing/.state.sqlite ]; then + echo "==> No state DB found, restoring from R2..." + litestream restore -config /etc/litestream.yml /data/landing/.state.sqlite \ + || echo "==> No state backup found, starting fresh" + fi exec litestream replicate -config /etc/litestream.yml env_file: ./.env volumes: - app-data:/app/data - ./litestream.yml:/etc/litestream.yml:ro + - /data/padelnomics/landing:/data/landing healthcheck: test: ["CMD-SHELL", "kill -0 1"] interval: 5s diff --git a/infra/landing-backup/padelnomics-landing-backup.service b/infra/landing-backup/padelnomics-landing-backup.service new file mode 100644 index 0000000..abc77fa --- /dev/null +++ b/infra/landing-backup/padelnomics-landing-backup.service @@ -0,0 +1,20 @@ +[Unit] +Description=Padelnomics Landing Zone Backup to R2 +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +EnvironmentFile=/opt/padelnomics/.env +Environment=LANDING_DIR=/data/padelnomics/landing +ExecStart=/usr/bin/rclone sync ${LANDING_DIR} :s3:${LITESTREAM_R2_BUCKET}/padelnomics/landing \ + --s3-provider Cloudflare \ + --s3-access-key-id ${LITESTREAM_R2_ACCESS_KEY_ID} \ + --s3-secret-access-key ${LITESTREAM_R2_SECRET_ACCESS_KEY} \ + --s3-endpoint https://${LITESTREAM_R2_ENDPOINT} \ + --s3-no-check-bucket \ + --exclude ".state.sqlite*" + +StandardOutput=journal +StandardError=journal +SyslogIdentifier=padelnomics-landing-backup diff --git a/infra/landing-backup/padelnomics-landing-backup.timer b/infra/landing-backup/padelnomics-landing-backup.timer new file mode 100644 index 0000000..a84a220 --- /dev/null +++ b/infra/landing-backup/padelnomics-landing-backup.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Sync landing zone to R2 every 30 minutes + +[Timer] +OnBootSec=5min +OnUnitActiveSec=30min + +[Install] +WantedBy=timers.target diff --git a/infra/restore_landing.sh b/infra/restore_landing.sh new file mode 100755 index 0000000..e15f295 --- /dev/null +++ b/infra/restore_landing.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Restore landing zone files from R2. +# The extraction state DB (.state.sqlite) is restored automatically by +# the Litestream container on startup — this script handles the data files only. +# +# Requires: rclone, LITESTREAM_R2_* env vars (from /opt/padelnomics/.env) +# +# Usage: +# source /opt/padelnomics/.env && bash infra/restore_landing.sh + +set -eu + +LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" +BUCKET_PREFIX="${LITESTREAM_R2_BUCKET:?LITESTREAM_R2_BUCKET not set}/padelnomics/landing" + +echo "==> Restoring landing zone from R2 to ${LANDING_DIR}..." + +rclone sync ":s3:${BUCKET_PREFIX}" "$LANDING_DIR" \ + --s3-provider Cloudflare \ + --s3-access-key-id "${LITESTREAM_R2_ACCESS_KEY_ID:?not set}" \ + --s3-secret-access-key "${LITESTREAM_R2_SECRET_ACCESS_KEY:?not set}" \ + --s3-endpoint "https://${LITESTREAM_R2_ENDPOINT:?not set}" \ + --s3-no-check-bucket \ + --exclude ".state.sqlite*" \ + --progress + +echo "==> Landing zone restored to ${LANDING_DIR}" diff --git a/infra/setup_server.sh b/infra/setup_server.sh index cf680f7..20417b7 100644 --- a/infra/setup_server.sh +++ b/infra/setup_server.sh @@ -38,6 +38,32 @@ else echo "Deploy key already exists, skipping" fi +# Install rclone (landing zone backup to R2) +if ! command -v rclone &>/dev/null; then + echo "Installing rclone..." + curl -fsSL https://rclone.org/install.sh | bash + echo "Installed rclone $(rclone version --check | head -1)" +else + echo "rclone already installed, skipping" +fi + +# Create landing data directory +mkdir -p /data/padelnomics/landing +echo "Created /data/padelnomics/landing" + +# Install and enable landing backup timer +cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.service" /etc/systemd/system/ +cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.timer" /etc/systemd/system/ +systemctl daemon-reload +systemctl enable --now padelnomics-landing-backup.timer +echo "Enabled landing backup timer (every 30 min)" + +# Install and enable supervisor service +cp "$APP_DIR/infra/supervisor/padelnomics-supervisor.service" /etc/systemd/system/ +systemctl daemon-reload +systemctl enable --now padelnomics-supervisor.service +echo "Enabled supervisor service" + echo "" echo "=== Next steps ===" echo "1. Add this deploy key to GitLab (Settings → Repository → Deploy Keys, read-only):" diff --git a/litestream.yml b/litestream.yml index 3b948e5..ec6366e 100644 --- a/litestream.yml +++ b/litestream.yml @@ -6,9 +6,12 @@ # LITESTREAM_R2_SECRET_ACCESS_KEY # LITESTREAM_R2_ENDPOINT e.g. .r2.cloudflarestorage.com # -# Recovery: +# Recovery (app database): # litestream restore -config /etc/litestream.yml /app/data/app.db # litestream restore -config /etc/litestream.yml -timestamp "2026-01-15T12:00:00Z" /app/data/app.db +# +# Recovery (extraction state): +# litestream restore -config /etc/litestream.yml /data/landing/.state.sqlite dbs: - path: /app/data/app.db @@ -19,3 +22,12 @@ dbs: endpoint: https://${LITESTREAM_R2_ENDPOINT} retention: 8760h snapshot-interval: 6h + + - path: /data/landing/.state.sqlite + replicas: + - url: s3://${LITESTREAM_R2_BUCKET}/padelnomics/state.sqlite + access-key-id: ${LITESTREAM_R2_ACCESS_KEY_ID} + secret-access-key: ${LITESTREAM_R2_SECRET_ACCESS_KEY} + endpoint: https://${LITESTREAM_R2_ENDPOINT} + retention: 8760h + snapshot-interval: 24h