feat(orchestrator): complete container migration and release hardening

This commit is contained in:
archipelago
2026-04-28 15:00:58 -04:00
parent ce39430b33
commit 43de3b73b2
94 changed files with 5034 additions and 1003 deletions

View File

@@ -41,6 +41,11 @@ detect_environment() {
[ "$TOTAL_MEM_MB" -lt 12000 ] && LOW_MEM=true
HOST_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
HOST_IP=${HOST_IP:-127.0.0.1}
# Stable mDNS hostname for URLs that get baked into federation/consensus data.
# Survives DHCP churn and reinstalls-on-different-IP (which $HOST_IP does not).
# Requires avahi-daemon (shipped on all Archipelago nodes).
HOST_MDNS="$(hostname 2>/dev/null).local"
HOST_MDNS="${HOST_MDNS:-archipelago.local}"
# Secrets
SECRETS_DIR="/var/lib/archipelago/secrets"
@@ -50,6 +55,10 @@ detect_environment() {
BTCPAY_DB_PASS=$(cat "$SECRETS_DIR/btcpay-db-password" 2>/dev/null || echo "")
MYSQL_ROOT_PASS=$(cat "$SECRETS_DIR/mysql-root-db-password" 2>/dev/null || echo "")
FEDI_HASH=$(cat "$SECRETS_DIR/fedimint-gateway-hash" 2>/dev/null || echo "")
# Escape $ so SPEC_ENTRYPOINT survives eval in reconcile-containers.sh:build_run_cmd.
# bcrypt hashes have the form $2y$10$... and get mangled if $2 and $10 are
# interpolated as positional args at eval time.
FEDI_HASH="${FEDI_HASH//\$/\\\$}"
}
# ── Spec variables ────────────────────────────────────────────────────
@@ -274,7 +283,7 @@ load_spec_fedimint() {
SPEC_VOLUMES="/var/lib/archipelago/fedimint:/data"
SPEC_MEMORY="$(mem_limit fedimint)"
SPEC_HEALTH_CMD="curl -sf http://localhost:8175/ || exit 1"
SPEC_ENV="FM_DATA_DIR=/data FM_BITCOIND_USERNAME=$BITCOIN_RPC_USER FM_BITCOIND_PASSWORD=$BITCOIN_RPC_PASS FM_BITCOIN_NETWORK=bitcoin FM_BIND_P2P=0.0.0.0:8173 FM_BIND_API=0.0.0.0:8174 FM_BIND_UI=0.0.0.0:8175 FM_P2P_URL=fedimint://$HOST_IP:8173 FM_API_URL=ws://$HOST_IP:8174 FM_BITCOIND_URL=http://$HOST_IP:8332"
SPEC_ENV="FM_DATA_DIR=/data FM_BITCOIND_USERNAME=$BITCOIN_RPC_USER FM_BITCOIND_PASSWORD=$BITCOIN_RPC_PASS FM_BITCOIN_NETWORK=bitcoin FM_BIND_P2P=0.0.0.0:8173 FM_BIND_API=0.0.0.0:8174 FM_BIND_UI=0.0.0.0:8175 FM_P2P_URL=fedimint://$HOST_MDNS:8173 FM_API_URL=ws://$HOST_MDNS:8174 FM_BITCOIND_URL=http://bitcoin-knots:8332"
SPEC_TIER="2"
SPEC_DATA_DIR="/var/lib/archipelago/fedimint"
SPEC_DEPENDS="bitcoin-knots"
@@ -299,10 +308,10 @@ load_spec_fedimint-gateway() {
local LND_MAC=/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon
if [ -f "$LND_CERT" ] && [ -f "$LND_MAC" ]; then
SPEC_VOLUMES="$SPEC_VOLUMES $LND_CERT:/lnd/tls.cert:ro $LND_MAC:/lnd/admin.macaroon:ro"
SPEC_ENTRYPOINT="gatewayd --data-dir /data --listen 0.0.0.0:8176 --bcrypt-password-hash $FEDI_HASH --network bitcoin --bitcoind-url http://$HOST_IP:8332 --bitcoind-username $BITCOIN_RPC_USER --bitcoind-password $BITCOIN_RPC_PASS lnd --lnd-rpc-host $HOST_IP:10009 --lnd-tls-cert /lnd/tls.cert --lnd-macaroon /lnd/admin.macaroon"
SPEC_ENTRYPOINT="gatewayd --data-dir /data --listen 0.0.0.0:8176 --bcrypt-password-hash $FEDI_HASH --network bitcoin --bitcoind-url http://bitcoin-knots:8332 --bitcoind-username $BITCOIN_RPC_USER --bitcoind-password $BITCOIN_RPC_PASS lnd --lnd-rpc-host lnd:10009 --lnd-tls-cert /lnd/tls.cert --lnd-macaroon /lnd/admin.macaroon"
else
SPEC_PORTS="8176:8176 9737:9737"
SPEC_ENTRYPOINT="gatewayd --data-dir /data --listen 0.0.0.0:8176 --bcrypt-password-hash $FEDI_HASH --network bitcoin --bitcoind-url http://$HOST_IP:8332 --bitcoind-username $BITCOIN_RPC_USER --bitcoind-password $BITCOIN_RPC_PASS ldk --ldk-lightning-port 9737 --ldk-alias archipelago-gateway"
SPEC_ENTRYPOINT="gatewayd --data-dir /data --listen 0.0.0.0:8176 --bcrypt-password-hash $FEDI_HASH --network bitcoin --bitcoind-url http://bitcoin-knots:8332 --bitcoind-username $BITCOIN_RPC_USER --bitcoind-password $BITCOIN_RPC_PASS ldk --ldk-lightning-port 9737 --ldk-alias archipelago-gateway"
fi
}
@@ -468,7 +477,15 @@ load_spec_filebrowser() {
SPEC_HEALTH_CMD="wget -q --spider http://localhost:80/health || exit 1"
SPEC_TIER="3"
SPEC_DATA_DIR="/var/lib/archipelago/filebrowser"
SPEC_CAPS=""
SPEC_DATA_UID="100000:100000"
# first-boot-containers.sh writes /data/.filebrowser.json (see filebrowser
# creation block at ~line 1128). Config path is required or filebrowser
# opens /database.db in CWD and fails with permission denied.
SPEC_CUSTOM_ARGS="--config /data/.filebrowser.json"
# Needs default caps (CHOWN FOWNER SETUID SETGID DAC_OVERRIDE) from reset_spec
# for rootless userns-root to write /data/filebrowser.db, plus NET_BIND_SERVICE
# to listen on port 80.
SPEC_CAPS="CHOWN FOWNER SETUID SETGID DAC_OVERRIDE NET_BIND_SERVICE"
SPEC_OPTIONAL="true"
}

View File

@@ -168,6 +168,13 @@ fi
TARGET_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
[ -z "$TARGET_IP" ] && TARGET_IP="127.0.0.1"
# Stable mDNS hostname for federation/consensus URLs (survives DHCP / reinstalls).
# Falls back to $TARGET_IP if avahi is not available.
HOST_MDNS="$(hostname 2>/dev/null).local"
if [ -z "$HOST_MDNS" ] || [ "$HOST_MDNS" = ".local" ]; then
HOST_MDNS="$TARGET_IP"
fi
# Map host.containers.internal to the rootless-podman host gateway.
# Podman 4.4+ supports the magic string "host-gateway" which resolves to
# the correct in-container-network gateway IP at container start. We used
@@ -916,7 +923,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q fedimint; then
-e FM_DATA_DIR=/data -e "FM_BITCOIND_USERNAME=$BTC_RPC_USER" -e "FM_BITCOIND_PASSWORD=$BTC_RPC_PASS" \
-e FM_BITCOIN_NETWORK=bitcoin -e FM_BIND_P2P=0.0.0.0:8173 \
-e FM_BIND_API=0.0.0.0:8174 -e FM_BIND_UI=0.0.0.0:8175 \
-e FM_P2P_URL=fedimint://"$TARGET_IP":8173 -e FM_API_URL=ws://"$TARGET_IP":8174 \
-e FM_P2P_URL=fedimint://"$HOST_MDNS":8173 -e FM_API_URL=ws://"$HOST_MDNS":8174 \
-e "FM_BITCOIND_URL=http://$BTC_HOST:$BTC_PORT" \
"$FEDIMINT_IMAGE" 2>>"$LOG" || true
fi
@@ -945,7 +952,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q fedimint-gateway; th
--bcrypt-password-hash "$FEDI_HASH" \
--network bitcoin --bitcoind-url "http://$BTC_HOST:$BTC_PORT" \
--bitcoind-username "$BTC_RPC_USER" --bitcoind-password "$BTC_RPC_PASS" \
lnd --lnd-rpc-host "$TARGET_IP":10009 --lnd-tls-cert /lnd/tls.cert --lnd-macaroon /lnd/admin.macaroon 2>>"$LOG" || true
lnd --lnd-rpc-host lnd:10009 --lnd-tls-cert /lnd/tls.cert --lnd-macaroon /lnd/admin.macaroon 2>>"$LOG" || true
else
log " No LND found — using ldk (built-in Lightning)"
$DOCKER run -d --name fedimint-gateway --restart unless-stopped \

View File

@@ -10,10 +10,25 @@ location /app/grafana/ {
proxy_hide_header X-Frame-Options;
proxy_hide_header Content-Security-Policy;
}
location = /app/uptime-kuma/ {
return 302 /app/uptime-kuma/dashboard;
}
location /app/uptime-kuma/ {
proxy_pass http://127.0.0.1:3001/;
proxy_pass http://127.0.0.1:3002/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Prefix /app/uptime-kuma;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_redirect / /app/uptime-kuma/;
proxy_hide_header X-Frame-Options;
proxy_hide_header Content-Security-Policy;
}
location /app/gitea/ {
proxy_pass http://127.0.0.1:3001/;
proxy_http_version 1.1;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

View File

@@ -94,11 +94,16 @@ is_user_stopped() {
# ── Inspection helpers ───────────────────────────────────────────────
container_exists() {
$PODMAN ps -a --format '{{.Names}}' 2>/dev/null | grep -qx "$1"
# Avoid SIGPIPE-from-grep-q failing under `set -o pipefail`.
local names
names=$($PODMAN ps -a --format '{{.Names}}' 2>/dev/null)
echo "$names" | grep -qx "$1"
}
container_running() {
$PODMAN ps --format '{{.Names}}' 2>/dev/null | grep -qx "$1"
local names
names=$($PODMAN ps --format '{{.Names}}' 2>/dev/null)
echo "$names" | grep -qx "$1"
}
container_image() {
@@ -117,8 +122,35 @@ container_memory() {
$PODMAN inspect "$1" --format '{{.HostConfig.Memory}}' 2>/dev/null
}
# Read one environment variable's current value from a running/stopped container.
# Returns empty string if the var is not set.
container_env_val() {
local name="$1" key="$2"
$PODMAN inspect "$name" --format '{{range .Config.Env}}{{println .}}{{end}}' 2>/dev/null \
| awk -F= -v k="$key" '$1==k { sub(/^[^=]+=/, ""); print; exit }'
}
# Env keys whose values bake network topology into the container. If the spec's
# value for one of these keys ever differs from the running container's value
# (host IP changed, DHCP lease rotated, LAN re-subnetted, container dependency
# moved between archy-net and bridge), the container MUST be recreated.
# This is the systemic fix for the fedimint April-11 stale-IP class of bug
# where a container's URL env was never reconciled after network changes.
#
# Match by suffix to keep the list small. Covers:
# *_URL (FM_P2P_URL, FM_API_URL, FM_BITCOIND_URL, NBXPLORER_BTCRPCURL, ...)
# *_HOST (BTCPAY_HOST, CORE_RPC_HOST, ...)
# *_ENDPOINT (NBXPLORER_BTCNODEENDPOINT, ...)
URL_ENV_SUFFIXES="_URL _HOST _ENDPOINT"
image_exists() {
$PODMAN images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null | grep -q "$1"
# Note: `grep -q` closes stdin after first match → SIGPIPE (exit 141) on podman.
# With `set -o pipefail` active in the parent script, that propagates as failure
# and spuriously skips local-image containers. Use a full scan + explicit match
# check to keep the exit code stable regardless of pipefail.
local images
images=$($PODMAN images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null)
echo "$images" | grep -qF "$1"
}
# Convert memory string to bytes for comparison
@@ -287,6 +319,29 @@ reconcile() {
reasons+="memory(none→$SPEC_MEMORY) "
fi
# Check URL/HOST env drift — catches stale network topology baked into
# container env (fedimint April-11 bug: FM_P2P_URL pointed at old IP).
# Only checks URL-shaped keys; other env drift (passwords rotated, etc.)
# is intentionally ignored to avoid thrashing.
if [ "$action" = "OK" ] && [ -n "$SPEC_ENV" ]; then
for kv in $SPEC_ENV; do
local env_key="${kv%%=*}"
local env_val_spec="${kv#*=}"
local is_url_key=false
for suffix in $URL_ENV_SUFFIXES; do
case "$env_key" in *"$suffix") is_url_key=true; break ;; esac
done
[ "$is_url_key" = "true" ] || continue
local env_val_cur
env_val_cur=$(container_env_val "$name" "$env_key")
if [ "$env_val_cur" != "$env_val_spec" ]; then
action="RECREATE"
reasons+="env($env_key:$env_val_cur$env_val_spec) "
break
fi
done
fi
# Check if running
if ! container_running "$name" && [ "$action" = "OK" ]; then
action="START"