refactor: update dependencies and remove unused code
- Added new dependencies: `adler2`, `crc32fast`, `flate2`, `miniz_oxide`, and `libredox`. - Updated existing dependencies: `tokio-rustls` to version 0.26.4 and `filetime` to version 0.2.27. - Removed the `backup.rs` file as it is no longer needed. - Introduced tests for configuration and credential management. - Enhanced the `identity` module to generate W3C compliant DID documents. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
339
scripts/chaos-test.sh
Executable file
339
scripts/chaos-test.sh
Executable file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env bash
|
||||
# chaos-test.sh — Chaos/resilience test for Archipelago server.
|
||||
#
|
||||
# Tests the server's ability to survive adverse conditions:
|
||||
# - Process kills (verify systemd restart)
|
||||
# - Container stop/start cycling
|
||||
# - Concurrent RPC requests (verify no crashes)
|
||||
# - High disk usage warnings
|
||||
# - Network interruption recovery
|
||||
#
|
||||
# Usage:
|
||||
# ssh archipelago@192.168.1.228 "cd ~/archy && bash scripts/chaos-test.sh"
|
||||
#
|
||||
# Duration: ~30 minutes by default (set CHAOS_DURATION_HOURS for longer)
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
CHAOS_DURATION_HOURS="${CHAOS_DURATION_HOURS:-0.5}"
|
||||
RPC_URL="http://localhost:5678/rpc/v1"
|
||||
HEALTH_URL="http://localhost/health"
|
||||
MAX_RECOVERY_WAIT=60
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TESTS=()
|
||||
|
||||
log() { echo -e "${GREEN}[CHAOS]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
|
||||
|
||||
record() {
|
||||
local name="$1" result="$2"
|
||||
if [ "$result" = "PASS" ]; then
|
||||
PASS=$((PASS + 1))
|
||||
TESTS+=("PASS $name")
|
||||
else
|
||||
FAIL=$((FAIL + 1))
|
||||
TESTS+=("FAIL $name")
|
||||
fi
|
||||
}
|
||||
|
||||
# Authenticate
|
||||
COOKIE_FILE=$(mktemp)
|
||||
authenticate() {
|
||||
curl -s -c "$COOKIE_FILE" -X POST "$RPC_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"auth.login","params":{"password":"password123"}}' > /dev/null 2>&1
|
||||
}
|
||||
|
||||
rpc() {
|
||||
local method="$1"
|
||||
local params="${2:-null}"
|
||||
local csrf
|
||||
csrf=$(grep csrf_token "$COOKIE_FILE" 2>/dev/null | awk '{print $NF}' || echo "")
|
||||
curl -s -b "$COOKIE_FILE" -X POST "$RPC_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-CSRF-Token: $csrf" \
|
||||
-d "{\"method\":\"$method\",\"params\":$params}" 2>/dev/null
|
||||
}
|
||||
|
||||
wait_for_health() {
|
||||
local timeout="${1:-$MAX_RECOVERY_WAIT}"
|
||||
local elapsed=0
|
||||
while [ "$elapsed" -lt "$timeout" ]; do
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Archipelago Chaos Test Suite"
|
||||
echo "============================================"
|
||||
echo " Duration: ${CHAOS_DURATION_HOURS}h"
|
||||
echo ""
|
||||
|
||||
# Pre-check
|
||||
if ! curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
fail "Server not healthy at $HEALTH_URL — aborting"
|
||||
exit 1
|
||||
fi
|
||||
log "Server is healthy"
|
||||
authenticate
|
||||
|
||||
# =============================================================================
|
||||
# Test 1: Process Kill Recovery
|
||||
# =============================================================================
|
||||
log "=== Test 1: Process Kill Recovery ==="
|
||||
log "Killing archipelago process..."
|
||||
|
||||
sudo systemctl kill --signal=SIGKILL archipelago 2>/dev/null || \
|
||||
sudo kill -9 $(pgrep -f "/usr/local/bin/archipelago" | head -1) 2>/dev/null
|
||||
|
||||
sleep 2
|
||||
|
||||
if wait_for_health 30; then
|
||||
log "Backend recovered after SIGKILL in <30s"
|
||||
record "Process kill recovery" "PASS"
|
||||
else
|
||||
fail "Backend did not recover after SIGKILL within 30s"
|
||||
record "Process kill recovery" "FAIL"
|
||||
# Try to restart manually
|
||||
sudo systemctl start archipelago
|
||||
sleep 5
|
||||
fi
|
||||
|
||||
authenticate
|
||||
|
||||
# =============================================================================
|
||||
# Test 2: Graceful Restart
|
||||
# =============================================================================
|
||||
log "=== Test 2: Graceful Restart ==="
|
||||
log "Restarting archipelago service..."
|
||||
|
||||
sudo systemctl restart archipelago
|
||||
sleep 2
|
||||
|
||||
if wait_for_health 20; then
|
||||
log "Backend restarted gracefully"
|
||||
record "Graceful restart" "PASS"
|
||||
else
|
||||
fail "Backend did not come up after restart"
|
||||
record "Graceful restart" "FAIL"
|
||||
fi
|
||||
|
||||
authenticate
|
||||
|
||||
# =============================================================================
|
||||
# Test 3: Concurrent RPC Requests
|
||||
# =============================================================================
|
||||
log "=== Test 3: Concurrent RPC Load (100 requests) ==="
|
||||
|
||||
CONCURRENT_PASS=0
|
||||
CONCURRENT_FAIL=0
|
||||
|
||||
for i in $(seq 1 100); do
|
||||
(
|
||||
result=$(curl -sf -X POST "$RPC_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"system.stats"}' 2>/dev/null)
|
||||
if echo "$result" | grep -q "cpu_usage_percent"; then
|
||||
echo "OK" >> /tmp/chaos-concurrent-ok
|
||||
else
|
||||
echo "FAIL" >> /tmp/chaos-concurrent-fail
|
||||
fi
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
rm -f /tmp/chaos-concurrent-ok /tmp/chaos-concurrent-fail 2>/dev/null
|
||||
|
||||
# Re-authenticate in case cookies expired during load
|
||||
authenticate
|
||||
|
||||
# Check server still healthy
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
log "Server survived 100 concurrent requests"
|
||||
record "Concurrent RPC load" "PASS"
|
||||
else
|
||||
fail "Server crashed under concurrent load"
|
||||
record "Concurrent RPC load" "FAIL"
|
||||
sudo systemctl restart archipelago
|
||||
sleep 5
|
||||
authenticate
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Test 4: Container Stop/Start Cycling
|
||||
# =============================================================================
|
||||
log "=== Test 4: Container Stop/Start Cycling ==="
|
||||
|
||||
# Use filebrowser as test container (lightweight, quick to restart)
|
||||
CONTAINER_ID="filebrowser"
|
||||
if [ -n "$CONTAINER_ID" ]; then
|
||||
log "Testing with container: $CONTAINER_ID"
|
||||
|
||||
# Stop
|
||||
rpc "package.stop" "{\"id\":\"$CONTAINER_ID\"}" > /dev/null
|
||||
sleep 3
|
||||
|
||||
# Verify stopped
|
||||
status=$(rpc "container-status" "{\"id\":\"$CONTAINER_ID\"}")
|
||||
|
||||
# Start
|
||||
rpc "package.start" "{\"id\":\"$CONTAINER_ID\"}" > /dev/null
|
||||
sleep 10
|
||||
|
||||
# Verify running (check both container-status and podman directly)
|
||||
status=$(rpc "container-status" "{\"id\":\"$CONTAINER_ID\"}")
|
||||
podman_running=$(podman ps --filter "name=^${CONTAINER_ID}$" --format "{{.Status}}" 2>/dev/null | head -1 | grep -ci "up" || echo "0")
|
||||
if echo "$status" | grep -qi "running" || [ "$podman_running" -gt 0 ]; then
|
||||
log "Container $CONTAINER_ID stop/start cycle OK"
|
||||
record "Container cycling" "PASS"
|
||||
else
|
||||
warn "Container $CONTAINER_ID may not have restarted"
|
||||
record "Container cycling" "FAIL"
|
||||
fi
|
||||
else
|
||||
warn "No running containers found, skipping container test"
|
||||
TESTS+=("SKIP Container cycling (no containers)")
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Test 5: RPC Error Handling
|
||||
# =============================================================================
|
||||
log "=== Test 5: RPC Error Handling ==="
|
||||
|
||||
# Invalid method
|
||||
result=$(rpc "nonexistent.method")
|
||||
if echo "$result" | grep -qi "error\|unknown"; then
|
||||
log "Invalid method correctly returns error"
|
||||
err_pass=true
|
||||
else
|
||||
fail "Invalid method did not return error"
|
||||
err_pass=false
|
||||
fi
|
||||
|
||||
# Malformed JSON — server should not crash (any response is acceptable)
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$RPC_URL" -H "Content-Type: application/json" -d '{broken}' 2>/dev/null || echo "000")
|
||||
if [ "$http_code" != "000" ]; then
|
||||
log "Malformed JSON handled without crash (HTTP $http_code)"
|
||||
else
|
||||
# Server may have been restarting from previous test, wait and retry
|
||||
sleep 3
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$RPC_URL" -H "Content-Type: application/json" -d '{broken}' 2>/dev/null | tail -c 3 || echo "000")
|
||||
if [ -n "$http_code" ] && [ "$http_code" != "000" ]; then
|
||||
log "Malformed JSON handled without crash (HTTP $http_code, retry)"
|
||||
else
|
||||
warn "Server unreachable for malformed JSON test"
|
||||
err_pass=false
|
||||
fi
|
||||
fi
|
||||
|
||||
# Missing params
|
||||
result=$(rpc "backup.create")
|
||||
if echo "$result" | grep -qi "error\|missing"; then
|
||||
log "Missing params correctly returns error"
|
||||
else
|
||||
err_pass=false
|
||||
fi
|
||||
|
||||
if [ "$err_pass" = true ]; then
|
||||
record "RPC error handling" "PASS"
|
||||
else
|
||||
record "RPC error handling" "FAIL"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Test 6: Rapid Reconnection
|
||||
# =============================================================================
|
||||
log "=== Test 6: Rapid Restart Cycling ==="
|
||||
|
||||
for i in 1 2 3; do
|
||||
sudo systemctl restart archipelago
|
||||
sleep 3
|
||||
if ! wait_for_health 15; then
|
||||
fail "Failed to recover on cycle $i"
|
||||
record "Rapid restart cycling" "FAIL"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
log "Server survived 3 rapid restarts"
|
||||
record "Rapid restart cycling" "PASS"
|
||||
fi
|
||||
|
||||
authenticate
|
||||
|
||||
# =============================================================================
|
||||
# Test 7: Data Integrity After Chaos
|
||||
# =============================================================================
|
||||
log "=== Test 7: Data Integrity Check ==="
|
||||
|
||||
# Check system stats still work
|
||||
stats=$(rpc "system.stats")
|
||||
if echo "$stats" | grep -q "cpu_usage_percent"; then
|
||||
log "System stats OK"
|
||||
data_ok=true
|
||||
else
|
||||
fail "System stats broken"
|
||||
data_ok=false
|
||||
fi
|
||||
|
||||
# Check update status
|
||||
update=$(rpc "update.status")
|
||||
if echo "$update" | grep -q "current_version"; then
|
||||
log "Update status OK"
|
||||
else
|
||||
data_ok=false
|
||||
fi
|
||||
|
||||
# Check backup list
|
||||
backups=$(rpc "backup.list")
|
||||
if echo "$backups" | grep -q "backups"; then
|
||||
log "Backup list OK"
|
||||
else
|
||||
data_ok=false
|
||||
fi
|
||||
|
||||
if [ "$data_ok" = true ]; then
|
||||
record "Data integrity" "PASS"
|
||||
else
|
||||
record "Data integrity" "FAIL"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Summary
|
||||
# =============================================================================
|
||||
rm -f "$COOKIE_FILE"
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Chaos Test Results"
|
||||
echo "============================================"
|
||||
for r in "${TESTS[@]}"; do
|
||||
case "$r" in
|
||||
PASS*) echo -e " ${GREEN}$r${NC}" ;;
|
||||
FAIL*) echo -e " ${RED}$r${NC}" ;;
|
||||
SKIP*) echo -e " ${YELLOW}$r${NC}" ;;
|
||||
esac
|
||||
done
|
||||
echo ""
|
||||
echo " Passed: $PASS Failed: $FAIL"
|
||||
echo "============================================"
|
||||
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user