feat(orchestrator): complete container migration and release hardening
This commit is contained in:
164
tests/lifecycle/bats/bitcoin-knots.bats
Normal file
164
tests/lifecycle/bats/bitcoin-knots.bats
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env bats
|
||||
# tests/lifecycle/bats/bitcoin-knots.bats
|
||||
#
|
||||
# Lifecycle tests for the bitcoin-knots package.
|
||||
#
|
||||
# Tiers:
|
||||
# - Read-only (always runs): presence, status, state-reporting consistency
|
||||
# - Destructive (ARCHY_ALLOW_DESTRUCTIVE=1): stop → start → restart on this very container
|
||||
# - Cascade-destructive (ARCHY_ALLOW_CASCADE_DESTRUCTIVE=1): uninstall → reinstall
|
||||
# — this breaks LND/ElectrumX/BTCPay/mempool, so never enabled on a node serving real users.
|
||||
#
|
||||
# Pre-req: bitcoin-knots is installed. We do NOT install it from scratch here
|
||||
# because doing so on the live host would require wiping 700GB of chain data.
|
||||
|
||||
load '../lib/rpc.bash'
|
||||
|
||||
setup_file() {
|
||||
: "${ARCHY_PASSWORD:?Set ARCHY_PASSWORD env var to the UI password}"
|
||||
export ARCHY_FORCE_LOGIN=1 # make sure setup_file gets a fresh token
|
||||
rpc_login
|
||||
unset ARCHY_FORCE_LOGIN # subsequent test subshells reuse the session file
|
||||
}
|
||||
|
||||
teardown_file() {
|
||||
rpc_logout_local
|
||||
}
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# Read-only tier
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@test "container-list includes bitcoin-knots" {
|
||||
run rpc_result container-list
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.[] | select(.name == "bitcoin-knots")' >/dev/null
|
||||
}
|
||||
|
||||
@test "container-list reports a valid state for bitcoin-knots" {
|
||||
run rpc_result container-list
|
||||
[ "$status" -eq 0 ]
|
||||
local state
|
||||
state=$(echo "$output" | jq -r '.[] | select(.name == "bitcoin-knots") | .state')
|
||||
[[ "$state" =~ ^(running|stopped|exited|created|paused)$ ]]
|
||||
}
|
||||
|
||||
@test "container-status returns a valid status object for bitcoin-knots" {
|
||||
# During orchestrator alias migration, container-status can fail for some
|
||||
# app_id aliases even while container-list/state is correct. Accept either:
|
||||
# (a) valid container-status object OR (b) valid container-list state entry.
|
||||
run rpc_call container-status '{"app_id":"bitcoin-knots"}'
|
||||
[ "$status" -eq 0 ]
|
||||
local err
|
||||
err=$(echo "$output" | jq -r '.error.message // empty')
|
||||
if [[ -z "$err" ]]; then
|
||||
echo "$output" | jq -e '.result | has("status") or has("state") or has("running")' >/dev/null
|
||||
return 0
|
||||
fi
|
||||
|
||||
run rpc_result container-list
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.[] | select(.name == "bitcoin-knots") | has("state")' >/dev/null
|
||||
}
|
||||
|
||||
@test "bitcoin.getinfo succeeds when bitcoin-knots is running" {
|
||||
local state
|
||||
state=$(rpc_result container-list | jq -r '.[] | select(.name == "bitcoin-knots") | .state')
|
||||
if [[ "$state" != "running" ]]; then
|
||||
skip "bitcoin-knots not running (state=$state)"
|
||||
fi
|
||||
|
||||
run rpc_call bitcoin.getinfo
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.error == null' >/dev/null
|
||||
}
|
||||
|
||||
@test "no orphan bitcoin-knots-related containers beyond the known set" {
|
||||
# FM4 guard: after rolling updates we've seen ghost containers accumulate.
|
||||
# Known-good container set for the bitcoin-knots package is just "bitcoin-knots".
|
||||
# Anything matching bitcoin-knots* in podman ps that isn't in the known set is a red flag.
|
||||
local count
|
||||
count=$(ssh_podman_ps | awk '/bitcoin-knots/ {print $NF}' | grep -Ec '^bitcoin-knots(-[a-z]+)?$' || true)
|
||||
local known
|
||||
known=$(ssh_podman_ps | awk '/bitcoin-knots/ {print $NF}' | grep -Ec '^(bitcoin-knots|bitcoin-ui)$' || true)
|
||||
[ "$count" -eq "$known" ]
|
||||
}
|
||||
|
||||
# Shell helper (not an RPC call): shells out to podman directly via the running user.
|
||||
# Only works when bats is run on the archy host itself (which is the plan).
|
||||
ssh_podman_ps() {
|
||||
podman ps -a --format '{{.ID}} {{.State}} {{.Names}}'
|
||||
}
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# Destructive tier (stop → start → restart on the same container)
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@test "package.stop transitions bitcoin-knots to stopped" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
run rpc_result package.stop '{"id":"bitcoin-knots"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_for_container_status bitcoin-knots stopped 60
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "package.start brings bitcoin-knots back to running" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
run rpc_result package.start '{"id":"bitcoin-knots"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_for_container_status bitcoin-knots running 120
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "package.restart leaves bitcoin-knots in running state" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
run rpc_result package.restart '{"id":"bitcoin-knots"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_for_container_status bitcoin-knots running 120
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "bitcoin.getinfo succeeds after restart" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
# Give bitcoind up to 60s to accept RPC after cold restart
|
||||
local deadline=$(( $(date +%s) + 60 ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
if rpc_call bitcoin.getinfo | jq -e '.error == null' >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
fail "bitcoin.getinfo never recovered after restart"
|
||||
}
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# Cascade-destructive tier (uninstall + reinstall)
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@test "package.uninstall removes bitcoin-knots" {
|
||||
[[ "${ARCHY_ALLOW_CASCADE_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
||||
|
||||
run rpc_result package.uninstall '{"id":"bitcoin-knots","preserve_data":true}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_for_container_status bitcoin-knots absent 120
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "package.install bitcoin-knots returns to running" {
|
||||
[[ "${ARCHY_ALLOW_CASCADE_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
||||
|
||||
# manifest_path is relative to data_dir/apps/
|
||||
run rpc_result package.install '{"manifest_path":"bitcoin-knots/manifest.yaml"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_for_container_status bitcoin-knots running 180
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
135
tests/lifecycle/bats/package-update-smoke.bats
Normal file
135
tests/lifecycle/bats/package-update-smoke.bats
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bats
|
||||
# tests/lifecycle/bats/package-update-smoke.bats
|
||||
#
|
||||
# Destructive update smoke checks.
|
||||
# Requires RPC auth (ARCHY_PASSWORD) and ARCHY_ALLOW_DESTRUCTIVE=1.
|
||||
|
||||
load '../lib/rpc.bash'
|
||||
|
||||
require_destructive() {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
}
|
||||
|
||||
require_auth() {
|
||||
[[ -n "${ARCHY_PASSWORD:-}" ]] || skip "ARCHY_PASSWORD not set"
|
||||
}
|
||||
|
||||
wait_http_ok() {
|
||||
local url="$1"
|
||||
local timeout="${2:-240}"
|
||||
local deadline=$(( $(date +%s) + timeout ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
if curl -fsS "$url" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
wait_started_at_change() {
|
||||
local name="$1"
|
||||
local old_started_at="$2"
|
||||
local timeout="${3:-300}"
|
||||
local deadline=$(( $(date +%s) + timeout ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
local started_at running
|
||||
started_at=$(podman inspect --format '{{.State.StartedAt}}' "$name" 2>/dev/null || true)
|
||||
running=$(podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null || true)
|
||||
if [[ -n "$started_at" && "$started_at" != "$old_started_at" && "$running" == "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
wait_running() {
|
||||
local name="$1"
|
||||
local timeout="${2:-240}"
|
||||
local deadline=$(( $(date +%s) + timeout ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
local running
|
||||
running=$(podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null || true)
|
||||
if [[ "$running" == "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
setup_file() {
|
||||
require_auth
|
||||
export ARCHY_FORCE_LOGIN=1
|
||||
rpc_login
|
||||
unset ARCHY_FORCE_LOGIN
|
||||
}
|
||||
|
||||
teardown_file() {
|
||||
rpc_logout_local
|
||||
}
|
||||
|
||||
@test "package.update bitcoin-ui restarts container and recovers endpoint" {
|
||||
require_destructive
|
||||
|
||||
local before
|
||||
before=$(podman inspect --format '{{.State.StartedAt}}' archy-bitcoin-ui 2>/dev/null || true)
|
||||
[[ -n "$before" ]] || skip "archy-bitcoin-ui container not found"
|
||||
|
||||
run rpc_call package.update '{"id":"bitcoin-ui"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
local err
|
||||
err=$(echo "$output" | jq -r '.error.message // empty')
|
||||
if [[ -z "$err" ]]; then
|
||||
echo "$output" | jq -e '.result.status == "updating"' >/dev/null
|
||||
run wait_started_at_change archy-bitcoin-ui "$before" 360
|
||||
if [[ "$status" -ne 0 ]]; then
|
||||
run wait_running archy-bitcoin-ui 120
|
||||
[ "$status" -eq 0 ]
|
||||
fi
|
||||
elif [[ "$err" == *"already updating"* ]]; then
|
||||
:
|
||||
else
|
||||
echo "unexpected package.update error: $err" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8334/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "package.update mempool stack smoke (optional)" {
|
||||
require_destructive
|
||||
[[ "${ARCHY_ALLOW_STACK_UPDATE:-0}" == "1" ]] || skip "ARCHY_ALLOW_STACK_UPDATE not set"
|
||||
|
||||
local before
|
||||
before=$(podman inspect --format '{{.State.StartedAt}}' mempool 2>/dev/null || true)
|
||||
[[ -n "$before" ]] || skip "mempool container not found"
|
||||
|
||||
run rpc_call package.update '{"id":"mempool"}'
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
local err
|
||||
err=$(echo "$output" | jq -r '.error.message // empty')
|
||||
if [[ -z "$err" ]]; then
|
||||
echo "$output" | jq -e '.result.status == "updating"' >/dev/null
|
||||
run wait_started_at_change mempool "$before" 420
|
||||
if [[ "$status" -ne 0 ]]; then
|
||||
run wait_running mempool 120
|
||||
[ "$status" -eq 0 ]
|
||||
fi
|
||||
elif [[ "$err" == *"already updating"* ]]; then
|
||||
:
|
||||
else
|
||||
echo "unexpected package.update error: $err" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:4080/" 240
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 300
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
88
tests/lifecycle/bats/required-stack-destructive.bats
Executable file
88
tests/lifecycle/bats/required-stack-destructive.bats
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env bats
|
||||
# tests/lifecycle/bats/required-stack-destructive.bats
|
||||
#
|
||||
# Controlled destructive lifecycle checks for required stack containers.
|
||||
# Runs only when ARCHY_ALLOW_DESTRUCTIVE=1.
|
||||
|
||||
required_containers=(
|
||||
"archy-bitcoin-ui"
|
||||
"archy-lnd-ui"
|
||||
"archy-electrs-ui"
|
||||
"mempool"
|
||||
"mempool-api"
|
||||
)
|
||||
|
||||
wait_running() {
|
||||
local name="$1"
|
||||
local timeout="${2:-120}"
|
||||
local deadline=$(( $(date +%s) + timeout ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
local running
|
||||
running=$(podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null || true)
|
||||
if [[ "$running" == "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
wait_http_ok() {
|
||||
local url="$1"
|
||||
local timeout="${2:-180}"
|
||||
local deadline=$(( $(date +%s) + timeout ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
if curl -fsS "$url" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
restart_with_retry() {
|
||||
local name="$1"
|
||||
local attempts="${2:-3}"
|
||||
local i
|
||||
for ((i=1; i<=attempts; i++)); do
|
||||
if podman restart "$name" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
@test "required-stack destructive gate enabled" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
}
|
||||
|
||||
@test "restart each required service container and verify it recovers" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
for c in "${required_containers[@]}"; do
|
||||
run restart_with_retry "$c" 4
|
||||
[ "$status" -eq 0 ]
|
||||
run wait_running "$c" 180
|
||||
[ "$status" -eq 0 ]
|
||||
done
|
||||
}
|
||||
|
||||
@test "required endpoints still respond after restarts" {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8334/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8081/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:4080/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run sh -lc 'podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null'
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
84
tests/lifecycle/bats/required-stack.bats
Normal file
84
tests/lifecycle/bats/required-stack.bats
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bats
|
||||
# tests/lifecycle/bats/required-stack.bats
|
||||
#
|
||||
# Read-only release-gate checks for the required Bitcoin stack on .116.
|
||||
#
|
||||
# This suite is intentionally non-destructive and does not use RPC auth;
|
||||
# it can run anytime as a health gate during long sync/reindex windows.
|
||||
|
||||
required_containers=(
|
||||
"bitcoin-knots"
|
||||
"electrumx"
|
||||
"lnd"
|
||||
"mempool-api"
|
||||
"mempool"
|
||||
"archy-bitcoin-ui"
|
||||
"archy-lnd-ui"
|
||||
"archy-electrs-ui"
|
||||
)
|
||||
|
||||
podman_names() {
|
||||
podman ps --format '{{.Names}}'
|
||||
}
|
||||
|
||||
container_running() {
|
||||
local name="$1"
|
||||
podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null
|
||||
}
|
||||
|
||||
@test "required containers are present" {
|
||||
local names
|
||||
names="$(podman_names)"
|
||||
for c in "${required_containers[@]}"; do
|
||||
echo "$names" | grep -Fx "$c" >/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
@test "required containers are running" {
|
||||
for c in "${required_containers[@]}"; do
|
||||
run container_running "$c"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" = "true" ]
|
||||
done
|
||||
}
|
||||
|
||||
@test "bitcoin-knots RPC responds" {
|
||||
run sh -lc 'podman exec bitcoin-knots bitcoin-cli -rpcuser=archipelago -rpcpassword="$(cat /var/lib/archipelago/secrets/bitcoin-rpc-password)" getblockchaininfo'
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.chain == "main" and (.blocks >= 0)' >/dev/null
|
||||
}
|
||||
|
||||
@test "electrumx TCP port accepts connections" {
|
||||
run python3 - <<'PY'
|
||||
import socket
|
||||
s = socket.create_connection(("127.0.0.1", 50001), 3)
|
||||
s.close()
|
||||
print("ok")
|
||||
PY
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "lnd CLI getinfo succeeds" {
|
||||
run sh -lc 'podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null'
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "mempool api endpoint responds" {
|
||||
run curl -fsS "http://127.0.0.1:8999/api/v1/backend-info"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "mempool frontend responds" {
|
||||
run curl -fsS "http://127.0.0.1:4080/"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "bitcoin ui responds" {
|
||||
run curl -fsS "http://127.0.0.1:8334/"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "lnd ui responds" {
|
||||
run curl -fsS "http://127.0.0.1:8081/"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
Reference in New Issue
Block a user