feat: landing zone backup to R2 via rclone + Litestream
Landing files (append-only JSON.gz) synced to R2 every 30 min via systemd timer + rclone. Extraction state DB (.state.sqlite) continuously replicated via Litestream (second DB entry). Auto-restore on container startup for both app.db and .state.sqlite. Reuses existing R2 bucket and credentials — no new env vars needed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
20
infra/landing-backup/padelnomics-landing-backup.service
Normal file
20
infra/landing-backup/padelnomics-landing-backup.service
Normal file
@@ -0,0 +1,20 @@
|
||||
[Unit]
|
||||
Description=Padelnomics Landing Zone Backup to R2
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
EnvironmentFile=/opt/padelnomics/.env
|
||||
Environment=LANDING_DIR=/data/padelnomics/landing
|
||||
ExecStart=/usr/bin/rclone sync ${LANDING_DIR} :s3:${LITESTREAM_R2_BUCKET}/padelnomics/landing \
|
||||
--s3-provider Cloudflare \
|
||||
--s3-access-key-id ${LITESTREAM_R2_ACCESS_KEY_ID} \
|
||||
--s3-secret-access-key ${LITESTREAM_R2_SECRET_ACCESS_KEY} \
|
||||
--s3-endpoint https://${LITESTREAM_R2_ENDPOINT} \
|
||||
--s3-no-check-bucket \
|
||||
--exclude ".state.sqlite*"
|
||||
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=padelnomics-landing-backup
|
||||
9
infra/landing-backup/padelnomics-landing-backup.timer
Normal file
9
infra/landing-backup/padelnomics-landing-backup.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Sync landing zone to R2 every 30 minutes
|
||||
|
||||
[Timer]
|
||||
OnBootSec=5min
|
||||
OnUnitActiveSec=30min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
27
infra/restore_landing.sh
Executable file
27
infra/restore_landing.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/sh
|
||||
# Restore landing zone files from R2.
|
||||
# The extraction state DB (.state.sqlite) is restored automatically by
|
||||
# the Litestream container on startup — this script handles the data files only.
|
||||
#
|
||||
# Requires: rclone, LITESTREAM_R2_* env vars (from /opt/padelnomics/.env)
|
||||
#
|
||||
# Usage:
|
||||
# source /opt/padelnomics/.env && bash infra/restore_landing.sh
|
||||
|
||||
set -eu
|
||||
|
||||
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}"
|
||||
BUCKET_PREFIX="${LITESTREAM_R2_BUCKET:?LITESTREAM_R2_BUCKET not set}/padelnomics/landing"
|
||||
|
||||
echo "==> Restoring landing zone from R2 to ${LANDING_DIR}..."
|
||||
|
||||
rclone sync ":s3:${BUCKET_PREFIX}" "$LANDING_DIR" \
|
||||
--s3-provider Cloudflare \
|
||||
--s3-access-key-id "${LITESTREAM_R2_ACCESS_KEY_ID:?not set}" \
|
||||
--s3-secret-access-key "${LITESTREAM_R2_SECRET_ACCESS_KEY:?not set}" \
|
||||
--s3-endpoint "https://${LITESTREAM_R2_ENDPOINT:?not set}" \
|
||||
--s3-no-check-bucket \
|
||||
--exclude ".state.sqlite*" \
|
||||
--progress
|
||||
|
||||
echo "==> Landing zone restored to ${LANDING_DIR}"
|
||||
@@ -38,6 +38,32 @@ else
|
||||
echo "Deploy key already exists, skipping"
|
||||
fi
|
||||
|
||||
# Install rclone (landing zone backup to R2)
|
||||
if ! command -v rclone &>/dev/null; then
|
||||
echo "Installing rclone..."
|
||||
curl -fsSL https://rclone.org/install.sh | bash
|
||||
echo "Installed rclone $(rclone version --check | head -1)"
|
||||
else
|
||||
echo "rclone already installed, skipping"
|
||||
fi
|
||||
|
||||
# Create landing data directory
|
||||
mkdir -p /data/padelnomics/landing
|
||||
echo "Created /data/padelnomics/landing"
|
||||
|
||||
# Install and enable landing backup timer
|
||||
cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.service" /etc/systemd/system/
|
||||
cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.timer" /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now padelnomics-landing-backup.timer
|
||||
echo "Enabled landing backup timer (every 30 min)"
|
||||
|
||||
# Install and enable supervisor service
|
||||
cp "$APP_DIR/infra/supervisor/padelnomics-supervisor.service" /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now padelnomics-supervisor.service
|
||||
echo "Enabled supervisor service"
|
||||
|
||||
echo ""
|
||||
echo "=== Next steps ==="
|
||||
echo "1. Add this deploy key to GitLab (Settings → Repository → Deploy Keys, read-only):"
|
||||
|
||||
Reference in New Issue
Block a user