fix: deploy locking, safe eval replacement, first-boot error handling, script hardening

- S4: Add Bitcoin readiness gate and container tracking with final summary
- S5: Replace eval "$DB_PASSWORDS" with safe case-based variable parsing
- S6: Add deploy locking with stale lock detection (30min timeout)
- S7: Deploy rollback already implemented — verified existing mechanism
- S8: Switch trust-archipelago-cert.sh to SSH key auth, sshpass as fallback
- S9: Pipe MariaDB SQL via stdin to avoid password in ps output
- S17: Add disk space pre-flight check (abort if >85% full)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-03-21 01:39:22 +00:00
parent 3b7d541224
commit 2f60ef44ea
3 changed files with 142 additions and 24 deletions

View File

@@ -75,6 +75,36 @@ if [ -n "$TAILSCALE_NODE" ]; then
exec "$SCRIPT_DIR/deploy-tailscale.sh" "$TAILSCALE_NODE"
fi
# Deploy locking — prevent concurrent deploys to the same target
TARGET_IP_FOR_LOCK="$(echo "$TARGET_HOST" | cut -d@ -f2)"
LOCK_DIR="/tmp/archipelago-deploy-${TARGET_IP_FOR_LOCK}.lock"
# Check for stale lock (older than 30 minutes)
if [ -d "$LOCK_DIR" ]; then
LOCK_STAMP="$LOCK_DIR/pid"
if [ -f "$LOCK_STAMP" ]; then
# macOS uses stat -f %m, Linux uses stat -c %Y
if stat -c %Y "$LOCK_STAMP" >/dev/null 2>&1; then
LOCK_MTIME=$(stat -c %Y "$LOCK_STAMP")
else
LOCK_MTIME=$(stat -f %m "$LOCK_STAMP")
fi
LOCK_AGE=$(( $(date +%s) - ${LOCK_MTIME:-0} ))
if [ "$LOCK_AGE" -gt 1800 ]; then
echo "$(timestamp) WARNING: Removing stale lock (${LOCK_AGE}s old)"
rm -rf "$LOCK_DIR"
fi
fi
fi
# mkdir is atomic — fails if directory already exists
if ! mkdir "$LOCK_DIR" 2>/dev/null; then
echo "ERROR: Deploy already in progress for $TARGET_HOST (lock: $LOCK_DIR)"
exit 1
fi
echo $$ > "$LOCK_DIR/pid"
# Clean up lock on exit (normal, error, or signal)
cleanup_lock() { rm -rf "$LOCK_DIR"; }
trap cleanup_lock EXIT
# Dry run mode: show what would be deployed without executing
if [[ "$DRY_RUN" == "true" ]]; then
echo "═══ DRY RUN MODE — no changes will be made ═══"
@@ -168,6 +198,13 @@ if ! ssh $SSH_OPTS -o ConnectTimeout=5 "$TARGET_HOST" "echo ok" >/dev/null 2>&1;
fi
echo " Connected."
# Disk space pre-flight — abort if target is dangerously full
DISK_PCT=$(ssh $SSH_OPTS $TARGET_HOST "df / | tail -1 | awk '{print \$(NF-1)}' | tr -d '%'" 2>/dev/null)
if [ -n "$DISK_PCT" ] && [ "$DISK_PCT" -gt 85 ] 2>/dev/null; then
echo "ERROR: Target disk at ${DISK_PCT}% — need <85% for safe deploy. Free space and retry."
exit 1
fi
# Install prerequisites if missing (rsync for code sync, python3 for Claude API proxy)
progress "Checking prerequisites"
ssh $SSH_OPTS "$TARGET_HOST" '
@@ -940,7 +977,21 @@ MANIFEST_EOF
echo "FEDI_HASH=$(sudo cat "$SECRETS_DIR/fedimint-gateway-hash")"
fi
' 2>/dev/null)
eval "$DB_PASSWORDS"
# Safe variable parsing — never eval untrusted SSH output
while IFS='=' read -r key value; do
# Skip empty lines
[ -z "$key" ] && continue
# Only allow expected variable names
case "$key" in
MEMPOOL_DB_PASS) MEMPOOL_DB_PASS="$value" ;;
BTCPAY_DB_PASS) BTCPAY_DB_PASS="$value" ;;
IMMICH_DB_PASS) IMMICH_DB_PASS="$value" ;;
PENPOT_DB_PASS) PENPOT_DB_PASS="$value" ;;
MYSQL_ROOT_PASS) MYSQL_ROOT_PASS="$value" ;;
FEDI_HASH) FEDI_HASH="$value" ;;
*) echo " WARNING: Ignoring unexpected variable from server: $key" ;;
esac
done <<< "$DB_PASSWORDS"
# Fallback if hash not available
if [ -z "${FEDI_HASH:-}" ]; then
FEDI_HASH='$2y$10$t9YjjxkiktrlYvjajB/zgOMDnSNVg4HqrbDqh47u7Jf42whNdxNqC'