fix(deploy): move router config write to after health check passes
Router had no profile so it was always included in `up -d --wait`. Writing the new target's config BEFORE the wait caused the router to become unhealthy if the new slot failed — leaving it in a broken state for the next deploy attempt. Now: router keeps its old config (pointing to the still-running old slot) during the health check wait, so it stays healthy throughout. Config is only written and nginx -s reload triggered after the new slot passes its health check. This is the correct blue-green pattern. Also add `retries: 3` and `start_period: 10s` to the router health check for resilience against transient startup failures. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
42
deploy.sh
42
deploy.sh
@@ -35,7 +35,27 @@ echo "==> Running migrations..."
|
||||
$COMPOSE --profile "$TARGET" run --rm "${TARGET}-app" \
|
||||
python -m padelnomics.migrations.migrate
|
||||
|
||||
# ── Write router config (before starting, so nginx -t passes) ──
|
||||
# ── Start & health check ───────────────────────────────────
|
||||
# Router config is NOT written yet — router keeps old config so it stays
|
||||
# healthy while we wait for the new slot to pass its own health check.
|
||||
|
||||
echo "==> Starting $TARGET (waiting for health check)..."
|
||||
if ! $COMPOSE --profile "$TARGET" up -d --wait; then
|
||||
echo "!!! Health check failed — dumping logs"
|
||||
$COMPOSE logs --tail=100 2>&1 || true
|
||||
echo "!!! Rolling back"
|
||||
$COMPOSE stop "${TARGET}-app" "${TARGET}-worker" "${TARGET}-scheduler"
|
||||
LATEST=$($COMPOSE run --rm --entrypoint "" "${TARGET}-app" \
|
||||
sh -c "ls -t /app/data/app.db.pre-deploy-* 2>/dev/null | head -1")
|
||||
if [ -n "$LATEST" ]; then
|
||||
echo "==> Restoring database from ${LATEST}..."
|
||||
$COMPOSE run --rm --entrypoint "" "${TARGET}-app" \
|
||||
sh -c "cp '${LATEST}' /app/data/app.db"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Write router config and reload (new slot is healthy) ────
|
||||
|
||||
echo "==> Writing router config for $TARGET..."
|
||||
mkdir -p "$(dirname "$ROUTER_CONF")"
|
||||
@@ -57,26 +77,6 @@ server {
|
||||
}
|
||||
NGINX
|
||||
|
||||
# ── Start & health check ───────────────────────────────────
|
||||
|
||||
echo "==> Starting $TARGET (waiting for health check)..."
|
||||
if ! $COMPOSE --profile "$TARGET" up -d --wait; then
|
||||
echo "!!! Health check failed — dumping logs"
|
||||
$COMPOSE logs "${TARGET}-app" --tail=100 || true
|
||||
echo "!!! Rolling back"
|
||||
$COMPOSE stop "${TARGET}-app" "${TARGET}-worker" "${TARGET}-scheduler"
|
||||
LATEST=$($COMPOSE run --rm --entrypoint "" "${TARGET}-app" \
|
||||
sh -c "ls -t /app/data/app.db.pre-deploy-* 2>/dev/null | head -1")
|
||||
if [ -n "$LATEST" ]; then
|
||||
echo "==> Restoring database from ${LATEST}..."
|
||||
$COMPOSE run --rm --entrypoint "" "${TARGET}-app" \
|
||||
sh -c "cp '${LATEST}' /app/data/app.db"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Reload router to pick up new upstream ───────────────────
|
||||
|
||||
echo "==> Reloading router..."
|
||||
$COMPOSE exec router nginx -s reload
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ services:
|
||||
test: ["CMD", "nginx", "-t"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
litestream:
|
||||
image: litestream/litestream:latest
|
||||
|
||||
Reference in New Issue
Block a user