feat: landing zone backup to R2 via rclone + Litestream

Landing files (append-only JSON.gz) synced to R2 every 30 min via
systemd timer + rclone. Extraction state DB (.state.sqlite) continuously
replicated via Litestream (second DB entry). Auto-restore on container
startup for both app.db and .state.sqlite. Reuses existing R2 bucket
and credentials — no new env vars needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 14:06:16 +01:00
parent 5b6c4182f7
commit 76814dade7
8 changed files with 131 additions and 1 deletions

View File

@@ -0,0 +1,20 @@
[Unit]
Description=Padelnomics Landing Zone Backup to R2
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
EnvironmentFile=/opt/padelnomics/.env
Environment=LANDING_DIR=/data/padelnomics/landing
ExecStart=/usr/bin/rclone sync ${LANDING_DIR} :s3:${LITESTREAM_R2_BUCKET}/padelnomics/landing \
--s3-provider Cloudflare \
--s3-access-key-id ${LITESTREAM_R2_ACCESS_KEY_ID} \
--s3-secret-access-key ${LITESTREAM_R2_SECRET_ACCESS_KEY} \
--s3-endpoint https://${LITESTREAM_R2_ENDPOINT} \
--s3-no-check-bucket \
--exclude ".state.sqlite*"
StandardOutput=journal
StandardError=journal
SyslogIdentifier=padelnomics-landing-backup

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Sync landing zone to R2 every 30 minutes
[Timer]
OnBootSec=5min
OnUnitActiveSec=30min
[Install]
WantedBy=timers.target

27
infra/restore_landing.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/sh
# Restore landing zone files from R2.
# The extraction state DB (.state.sqlite) is restored automatically by
# the Litestream container on startup — this script handles the data files only.
#
# Requires: rclone, LITESTREAM_R2_* env vars (from /opt/padelnomics/.env)
#
# Usage:
# source /opt/padelnomics/.env && bash infra/restore_landing.sh
set -eu
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}"
BUCKET_PREFIX="${LITESTREAM_R2_BUCKET:?LITESTREAM_R2_BUCKET not set}/padelnomics/landing"
echo "==> Restoring landing zone from R2 to ${LANDING_DIR}..."
rclone sync ":s3:${BUCKET_PREFIX}" "$LANDING_DIR" \
--s3-provider Cloudflare \
--s3-access-key-id "${LITESTREAM_R2_ACCESS_KEY_ID:?not set}" \
--s3-secret-access-key "${LITESTREAM_R2_SECRET_ACCESS_KEY:?not set}" \
--s3-endpoint "https://${LITESTREAM_R2_ENDPOINT:?not set}" \
--s3-no-check-bucket \
--exclude ".state.sqlite*" \
--progress
echo "==> Landing zone restored to ${LANDING_DIR}"

View File

@@ -38,6 +38,32 @@ else
echo "Deploy key already exists, skipping"
fi
# Install rclone (landing zone backup to R2)
if ! command -v rclone &>/dev/null; then
echo "Installing rclone..."
curl -fsSL https://rclone.org/install.sh | bash
echo "Installed rclone $(rclone version --check | head -1)"
else
echo "rclone already installed, skipping"
fi
# Create landing data directory
mkdir -p /data/padelnomics/landing
echo "Created /data/padelnomics/landing"
# Install and enable landing backup timer
cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.service" /etc/systemd/system/
cp "$APP_DIR/infra/landing-backup/padelnomics-landing-backup.timer" /etc/systemd/system/
systemctl daemon-reload
systemctl enable --now padelnomics-landing-backup.timer
echo "Enabled landing backup timer (every 30 min)"
# Install and enable supervisor service
cp "$APP_DIR/infra/supervisor/padelnomics-supervisor.service" /etc/systemd/system/
systemctl daemon-reload
systemctl enable --now padelnomics-supervisor.service
echo "Enabled supervisor service"
echo ""
echo "=== Next steps ==="
echo "1. Add this deploy key to GitLab (Settings → Repository → Deploy Keys, read-only):"