feat: Phase 4 backend hardening — container reliability + security audit

Container Management (CONT-01 through CONT-06):
- Fix needs_archy_net: add lnd, nbxplorer to archy-net list
- Add StartupTier dependency ordering to health monitor (DB→Core→Dependent→App→UI)
- Add exponential backoff (10s/30s/90s) with 1hr stability reset
- Add get_health_check_args() with health checks for 20+ apps
- Add get_memory_limit() with per-app limits (128m-4g vs blanket 2g)
- Create docs/network-topology.md
- Fix fedimint containers on both nodes (moved to archy-net)

Security Audit (SEC-01 through SEC-06):
- Add sanitize_error_message() — strips internal paths from RPC errors
- Add validate_identity_id() — blocks path traversal on identity operations
- Add validate_did() — blocks path traversal on federation operations
- Add message size limits: node-send-message (1MB), dwn.write-message (10MB)
- Add rate limits for federation endpoints (join: 5/60s, invite: 10/300s)
- Configure journald (500MB max, 7 day retention) on both nodes
- Add /etc/logrotate.d/archipelago for backend + crowdsec logs
- Verify all 4 nginx security headers on both nodes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-03-14 02:45:28 +00:00
parent f9a47a2602
commit 6335ea17ee
10 changed files with 593 additions and 83 deletions

View File

@@ -151,6 +151,14 @@ impl RpcHandler {
let data_format = params["dataFormat"].as_str();
let data = params.get("data").cloned();
// Limit data size to 10MB to prevent disk exhaustion
if let Some(ref d) = data {
let data_str = d.to_string();
if data_str.len() > 10_485_760 {
anyhow::bail!("Message data too large (max 10MB)");
}
}
let store = DwnStore::new(&self.config.data_dir).await?;
let message = store
.write_message(author, protocol, schema, data_format, data)

View File

@@ -4,6 +4,20 @@ use crate::identity;
use anyhow::Result;
use tracing::info;
/// Validate a DID parameter: must start with "did:", max 256 chars, no path traversal.
fn validate_did(did: &str) -> Result<()> {
if did.is_empty() || did.len() > 256 {
anyhow::bail!("Invalid DID: must be 1-256 characters");
}
if !did.starts_with("did:") {
anyhow::bail!("Invalid DID: must start with 'did:'");
}
if did.contains("..") || did.contains('/') || did.contains('\\') || did.contains('\0') {
anyhow::bail!("Invalid DID: contains forbidden characters");
}
Ok(())
}
impl RpcHandler {
/// federation.invite — Generate an invite code containing our DID + onion for a peer.
pub(super) async fn handle_federation_invite(&self) -> Result<serde_json::Value> {
@@ -107,6 +121,7 @@ impl RpcHandler {
.get("did")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'did' parameter"))?;
validate_did(did)?;
let nodes = federation::remove_node(&self.config.data_dir, did).await?;
info!(did = %did, "Removed node from federation");
@@ -127,6 +142,7 @@ impl RpcHandler {
.get("did")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'did' parameter"))?;
validate_did(did)?;
let trust_str = params
.get("trust_level")
.and_then(|v| v.as_str())

View File

@@ -5,6 +5,20 @@ use crate::identity_manager::{IdentityManager, IdentityPurpose};
use anyhow::{Context, Result};
use nostr_sdk::ToBech32;
/// Validate an identity ID: alphanumeric, hyphens, underscores, 1-128 chars, no path traversal.
fn validate_identity_id(id: &str) -> Result<()> {
if id.is_empty() || id.len() > 128 {
anyhow::bail!("Invalid identity id: must be 1-128 characters");
}
if id.contains("..") || id.contains('/') || id.contains('\\') || id.contains('\0') {
anyhow::bail!("Invalid identity id: contains forbidden characters");
}
if !id.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b':') {
anyhow::bail!("Invalid identity id: must be alphanumeric, hyphens, underscores, or colons");
}
Ok(())
}
impl RpcHandler {
/// List all identities with their default status.
pub(super) async fn handle_identity_list(
@@ -83,6 +97,7 @@ impl RpcHandler {
.get("id")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required parameter: id"))?;
validate_identity_id(id)?;
let manager = IdentityManager::new(&self.config.data_dir).await?;
let record = manager.get(id).await?;
@@ -112,6 +127,7 @@ impl RpcHandler {
.get("id")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required parameter: id"))?;
validate_identity_id(id)?;
let manager = IdentityManager::new(&self.config.data_dir).await?;
manager.delete(id).await?;
@@ -129,6 +145,7 @@ impl RpcHandler {
.get("id")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required parameter: id"))?;
validate_identity_id(id)?;
let manager = IdentityManager::new(&self.config.data_dir).await?;
manager.set_default(id).await?;

View File

@@ -66,6 +66,42 @@ struct RpcError {
/// Default dev password when no user is set up (matches mock-backend).
pub(crate) const DEV_DEFAULT_PASSWORD: &str = "password123";
/// Sanitize error messages before returning to clients.
/// Keeps user-facing validation errors but strips internal system details.
fn sanitize_error_message(msg: &str) -> String {
// Allow known validation errors through (these are user-actionable)
let user_facing_prefixes = [
"Invalid",
"Missing",
"Not found",
"Already exists",
"Rate limit",
"Unauthorized",
"Forbidden",
"Not supported",
"requires",
"must be",
"cannot",
"Password",
"Session",
];
for prefix in &user_facing_prefixes {
if msg.starts_with(prefix) || msg.contains(prefix) {
// Truncate long messages and strip file paths
let sanitized = msg.replace("/var/lib/archipelago/", "[data]/")
.replace("/usr/local/bin/", "[bin]/")
.replace("/etc/", "[config]/");
return if sanitized.len() > 200 {
format!("{}...", &sanitized[..200])
} else {
sanitized
};
}
}
// For all other errors, return a generic message
"Operation failed. Check server logs for details.".to_string()
}
/// Methods that do not require a valid session cookie.
const UNAUTHENTICATED_METHODS: &[&str] = &[
"auth.login",
@@ -472,6 +508,9 @@ impl RpcHandler {
"mesh.broadcast" => self.handle_mesh_broadcast().await,
"mesh.configure" => self.handle_mesh_configure(params).await,
// Server settings
"server.set-name" => self.handle_server_set_name(params).await,
// System monitoring
"system.stats" => self.handle_system_stats().await,
"system.processes" => self.handle_system_processes().await,
@@ -558,12 +597,14 @@ impl RpcHandler {
error: None,
},
Err(e) => {
error!("RPC error: {}", e);
error!("RPC error on {}: {}", rpc_req.method, e);
// Sanitize error messages: only return user-facing text, not internal details
let user_message = sanitize_error_message(&e.to_string());
RpcResponse {
result: None,
error: Some(RpcError {
code: -1,
message: e.to_string(),
message: user_message,
data: None,
}),
}

View File

@@ -226,8 +226,9 @@ impl RpcHandler {
let needs_archy_net = matches!(
package_id,
"bitcoin-knots" | "bitcoin" | "bitcoin-core"
| "lnd"
| "mempool" | "mempool-web" | "mempool-api" | "mempool-electrs" | "electrs" | "mysql-mempool" | "archy-mempool-db" | "archy-mempool-web"
| "btcpay-server" | "btcpayserver" | "archy-btcpay-db"
| "btcpay-server" | "btcpayserver" | "archy-btcpay-db" | "archy-nbxplorer" | "nbxplorer"
| "fedimint" | "fedimint-gateway"
);
@@ -329,12 +330,18 @@ printtoconsole=1\n";
run_args.push(env);
}
// Security: Resource limits (from manifest)
let memory_limit = if package_id == "ollama" { "4g" } else { "2g" };
// Resource limits: per-app memory and CPU
let memory_limit = get_memory_limit(package_id);
let mem_arg = format!("--memory={}", memory_limit);
run_args.push(&mem_arg);
run_args.push("--cpus=2");
// Health check definitions
let health_args = get_health_check_args(package_id);
for arg in &health_args {
run_args.push(arg);
}
// Finally, the image
run_args.push(docker_image);
@@ -1289,6 +1296,148 @@ fn is_readonly_compatible(app_id: &str) -> bool {
)
}
/// Get container health check arguments for podman run.
/// Returns (health-cmd, interval, retries) args to append to run_args.
fn get_health_check_args(app_id: &str) -> Vec<String> {
let (cmd, interval, retries) = match app_id {
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => (
"bitcoin-cli -rpcuser=archipelago -rpcpassword=archipelago123 getblockchaininfo || exit 1",
"30s", "3",
),
"lnd" => (
"lncli getinfo || exit 1",
"30s", "3",
),
"btcpay-server" | "btcpayserver" => (
"curl -sf http://localhost:49392/ || exit 1",
"30s", "3",
),
"mempool-api" => (
"curl -sf http://localhost:8999/api/v1/backend-info || exit 1",
"30s", "3",
),
"mempool" | "mempool-web" | "archy-mempool-web" => (
"curl -sf http://localhost:8080/ || exit 1",
"30s", "3",
),
"mempool-electrs" | "electrs" => (
"curl -sf http://localhost:50001/ || exit 1",
"60s", "3",
),
"nextcloud" => (
"curl -sf http://localhost:80/status.php || exit 1",
"30s", "3",
),
"homeassistant" | "home-assistant" => (
"curl -sf http://localhost:8123/api/ || exit 1",
"30s", "3",
),
"grafana" => (
"curl -sf http://localhost:3000/api/health || exit 1",
"30s", "3",
),
"jellyfin" => (
"curl -sf http://localhost:8096/health || exit 1",
"30s", "3",
),
"vaultwarden" => (
"curl -sf http://localhost:80/alive || exit 1",
"30s", "3",
),
"uptime-kuma" => (
"curl -sf http://localhost:3001/ || exit 1",
"30s", "3",
),
"filebrowser" => (
"curl -sf http://localhost:80/health || exit 1",
"30s", "3",
),
"searxng" => (
"curl -sf http://localhost:8080/ || exit 1",
"30s", "3",
),
"photoprism" => (
"curl -sf http://localhost:2342/api/v1/status || exit 1",
"60s", "3",
),
"immich_server" | "immich" => (
"curl -sf http://localhost:2283/api/server/ping || exit 1",
"30s", "3",
),
"dwn" => (
"curl -sf http://localhost:3000/health || exit 1",
"30s", "3",
),
"portainer" => (
"curl -sf http://localhost:9000/api/status || exit 1",
"30s", "3",
),
"ollama" => (
"curl -sf http://localhost:11434/ || exit 1",
"30s", "3",
),
"fedimint" => (
"curl -sf http://localhost:8174/health || exit 1",
"60s", "3",
),
"nostr-rs-relay" | "nostr-relay" => (
"curl -sf http://localhost:8080/ || exit 1",
"30s", "3",
),
"nginx-proxy-manager" => (
"curl -sf http://localhost:81/api/ || exit 1",
"30s", "3",
),
_ => return vec![],
};
vec![
format!("--health-cmd={}", cmd),
format!("--health-interval={}", interval),
format!("--health-retries={}", retries),
"--health-start-period=60s".to_string(),
]
}
/// Get per-app memory limit.
fn get_memory_limit(app_id: &str) -> &'static str {
match app_id {
// Heavy apps
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => "2g",
"onlyoffice" | "onlyoffice-documentserver" => "2g",
"ollama" => "4g",
// Medium apps
"lnd" => "512m",
"mempool-electrs" | "electrs" => "1g",
"nextcloud" => "1g",
"immich_server" | "immich" => "1g",
"btcpay-server" | "btcpayserver" => "1g",
"homeassistant" | "home-assistant" => "512m",
"fedimint" => "512m",
"fedimint-gateway" => "512m",
"photoprism" => "1g",
// Light apps
"mempool-api" => "512m",
"mempool" | "mempool-web" | "archy-mempool-web" => "256m",
"grafana" => "256m",
"jellyfin" => "1g",
"vaultwarden" => "256m",
"uptime-kuma" => "256m",
"filebrowser" => "256m",
"searxng" => "512m",
"dwn" => "256m",
"portainer" => "256m",
"nostr-rs-relay" | "nostr-relay" => "256m",
"nginx-proxy-manager" => "256m",
// Databases
"archy-btcpay-db" | "archy-mempool-db" | "mysql-mempool" => "512m",
"immich_postgres" | "penpot-postgres" => "256m",
"immich_redis" | "penpot-valkey" => "128m",
// Default
_ => "512m",
}
}
/// Get app-specific configuration
/// Returns: (ports, volumes, env_vars, custom_command, custom_args)
fn get_app_config(

View File

@@ -1,5 +1,5 @@
use super::RpcHandler;
use crate::{node_message, nostr_discovery, peers};
use crate::{federation, node_message, nostr_discovery, peers};
use crate::peers::KnownPeer;
use anyhow::Result;
@@ -61,15 +61,29 @@ impl RpcHandler {
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing message"))?;
// Validate onion is a known peer to prevent SSRF to arbitrary Tor destinations
// Limit message size to 1MB to prevent DoS
if message.len() > 1_048_576 {
anyhow::bail!("Message too large (max 1MB)");
}
// Validate onion is a known peer or federated node to prevent SSRF
let known_peers = peers::load_peers(&self.config.data_dir).await?;
let is_known = known_peers.iter().any(|p| {
let is_known_peer = known_peers.iter().any(|p| {
p.onion == onion || p.onion == format!("{}.onion", onion)
|| format!("{}.onion", p.onion) == onion
});
if !is_known {
let is_known_fed = if !is_known_peer {
let fed_nodes = federation::load_nodes(&self.config.data_dir).await.unwrap_or_default();
fed_nodes.iter().any(|n| {
n.onion == onion || n.onion == format!("{}.onion", onion)
|| format!("{}.onion", n.onion) == onion
})
} else {
false
};
if !is_known_peer && !is_known_fed {
return Err(anyhow::anyhow!(
"Onion address not in known peers list. Add the peer first."
"Onion address not in known peers or federation. Add the peer first."
));
}

View File

@@ -1,5 +1,6 @@
// Container Health Monitor
// Checks container health every 60s, auto-restarts unhealthy containers (max 3 times),
// Checks container health every 60s, auto-restarts unhealthy containers (max 3 times)
// with exponential backoff (10s, 30s, 90s), dependency-aware startup ordering,
// and sends WebSocket notifications to the UI on failure.
use crate::data_model::{Notification, NotificationLevel};
@@ -8,20 +9,67 @@ use crate::webhooks::{self, WebhookEvent};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use tracing::{debug, info, warn};
const MAX_RESTART_ATTEMPTS: u32 = 3;
const CHECK_INTERVAL_SECS: u64 = 60;
/// Backoff delays per attempt: 10s, 30s, 90s
const BACKOFF_DELAYS_SECS: [u64; 3] = [10, 30, 90];
/// Reset restart counter after 1 hour of stability
const STABILITY_RESET_SECS: u64 = 3600;
/// Track restart attempts per container to avoid infinite restart loops.
/// Container startup tier for dependency ordering.
/// Lower tiers start first. Containers in the same tier start together.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum StartupTier {
/// Databases: postgres, redis, mariadb, mysql
Database = 0,
/// Core infrastructure: bitcoin-knots, bitcoin-core
CoreInfra = 1,
/// Services depending on core: lnd, electrs, nbxplorer
DependentService = 2,
/// Application layer: mempool-api, btcpay-server, fedimint, nextcloud, etc.
Application = 3,
/// UI/frontend containers: mempool-web, bitcoin-ui, lnd-ui
Frontend = 4,
}
fn container_tier(name: &str) -> StartupTier {
let id = name.strip_prefix("archy-").unwrap_or(name);
match id {
// Tier 0: Databases
"btcpay-db" | "mempool-db" | "penpot-postgres" | "immich_postgres"
| "immich_redis" | "penpot-valkey" | "endurain-db" | "nextcloud-db" => StartupTier::Database,
// Tier 1: Core infrastructure
"bitcoin-knots" | "bitcoin-core" | "bitcoin" => StartupTier::CoreInfra,
// Tier 2: Dependent services
"lnd" | "mempool-electrs" | "electrs" | "nbxplorer" => StartupTier::DependentService,
// Tier 4: Frontend/UI
"mempool-web" | "bitcoin-ui" | "lnd-ui" | "electrs-ui"
| "penpot-frontend" | "penpot-exporter" => StartupTier::Frontend,
// Tier 3: Everything else
_ => StartupTier::Application,
}
}
/// Track restart attempts per container with exponential backoff and stability reset.
struct RestartTracker {
attempts: HashMap<String, u32>,
last_failure: HashMap<String, Instant>,
last_healthy: HashMap<String, Instant>,
}
impl RestartTracker {
fn new() -> Self {
Self {
attempts: HashMap::new(),
last_failure: HashMap::new(),
last_healthy: HashMap::new(),
}
}
@@ -29,17 +77,50 @@ impl RestartTracker {
fn record_attempt(&mut self, name: &str) -> bool {
let count = self.attempts.entry(name.to_string()).or_insert(0);
*count += 1;
self.last_failure.insert(name.to_string(), Instant::now());
*count <= MAX_RESTART_ATTEMPTS
}
/// Clear restart count when a container is healthy again.
fn clear(&mut self, name: &str) {
self.attempts.remove(name);
self.last_failure.remove(name);
self.last_healthy.insert(name.to_string(), Instant::now());
}
fn attempt_count(&self, name: &str) -> u32 {
*self.attempts.get(name).unwrap_or(&0)
}
/// Get the backoff delay in seconds for the current attempt number.
fn backoff_delay_secs(&self, name: &str) -> u64 {
let attempts = self.attempt_count(name);
if attempts == 0 {
return BACKOFF_DELAYS_SECS[0];
}
let idx = (attempts as usize).saturating_sub(1).min(BACKOFF_DELAYS_SECS.len() - 1);
BACKOFF_DELAYS_SECS[idx]
}
/// Check if enough time has passed since last failure for the backoff delay.
fn backoff_elapsed(&self, name: &str) -> bool {
let delay = self.backoff_delay_secs(name);
match self.last_failure.get(name) {
Some(last) => last.elapsed().as_secs() >= delay,
None => true,
}
}
/// Check if a failed container should have its counter reset (1h stability window).
fn should_reset_failed(&self, name: &str) -> bool {
if self.attempt_count(name) < MAX_RESTART_ATTEMPTS {
return false;
}
match self.last_failure.get(name) {
Some(last) => last.elapsed().as_secs() >= STABILITY_RESET_SECS,
None => false,
}
}
}
#[derive(Debug, Clone)]
@@ -152,77 +233,107 @@ pub fn spawn_health_monitor(state: Arc<StateManager>, data_dir: PathBuf) {
continue;
}
// Sort containers by startup tier so databases restart before dependent services
let mut unhealthy: Vec<&ContainerHealth> = Vec::new();
let mut state_changed = false;
let (mut data, _) = state.get_snapshot().await;
for container in &containers {
if container.healthy {
// Clear restart tracker if container recovered
if tracker.attempt_count(&container.name) > 0 {
info!("Container {} is healthy again after restart", container.name);
tracker.clear(&container.name);
}
continue;
}
// Container is unhealthy (exited/stopped)
// Only try auto-restart if we haven't exceeded max attempts
if container.state == "exited" || container.state == "stopped" {
let attempts = tracker.attempt_count(&container.name);
unhealthy.push(container);
}
}
if attempts >= MAX_RESTART_ATTEMPTS {
// Already notified, skip
debug!("Container {} exceeded max restart attempts ({})", container.name, MAX_RESTART_ATTEMPTS);
continue;
// Sort by startup tier: databases first, then core, then dependent, then apps, then UIs
unhealthy.sort_by_key(|c| container_tier(&c.name));
let mut prev_tier: Option<StartupTier> = None;
for container in &unhealthy {
let tier = container_tier(&container.name);
let attempts = tracker.attempt_count(&container.name);
// Reset counter after 1 hour for permanently failed containers
if tracker.should_reset_failed(&container.name) {
info!("Resetting restart counter for {} after {}s stability window", container.name, STABILITY_RESET_SECS);
tracker.clear(&container.name);
}
if tracker.attempt_count(&container.name) >= MAX_RESTART_ATTEMPTS {
debug!("Container {} exceeded max restart attempts ({})", container.name, MAX_RESTART_ATTEMPTS);
continue;
}
// Wait for backoff delay before retrying
if !tracker.backoff_elapsed(&container.name) {
let delay = tracker.backoff_delay_secs(&container.name);
debug!("Container {} waiting for backoff ({}s)", container.name, delay);
continue;
}
// When transitioning to a higher tier, wait briefly for previous tier to stabilize
if let Some(prev) = prev_tier {
if tier > prev {
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
}
prev_tier = Some(tier);
if tracker.record_attempt(&container.name) {
let restarted = restart_container(&container.name).await;
let attempt = tracker.attempt_count(&container.name);
if tracker.record_attempt(&container.name) {
let attempt = tracker.attempt_count(&container.name);
info!("Restarting {} (tier {:?}, attempt {}/{}, backoff {}s)",
container.name, tier, attempt, MAX_RESTART_ATTEMPTS,
BACKOFF_DELAYS_SECS.get(attempt.saturating_sub(1) as usize).unwrap_or(&90));
if !restarted || attempt >= MAX_RESTART_ATTEMPTS {
// Push notification to UI
let notification = Notification {
id: format!("health-{}-{}", container.app_id, chrono::Utc::now().timestamp()),
level: NotificationLevel::Error,
title: format!("{} is unhealthy", container.app_id),
message: if restarted {
format!(
"Container restarted ({}/{} attempts). May need manual attention.",
attempt, MAX_RESTART_ATTEMPTS
)
} else {
format!(
"Auto-restart failed (attempt {}/{}). Container state: {}",
attempt, MAX_RESTART_ATTEMPTS, container.state
)
},
timestamp: chrono::Utc::now().to_rfc3339(),
app_id: Some(container.app_id.clone()),
};
let restarted = restart_container(&container.name).await;
// Keep only the latest 20 notifications
data.notifications.push(notification.clone());
if data.notifications.len() > 20 {
data.notifications = data.notifications.split_off(data.notifications.len() - 20);
}
state_changed = true;
if !restarted || attempt >= MAX_RESTART_ATTEMPTS {
let notification = Notification {
id: format!("health-{}-{}", container.app_id, chrono::Utc::now().timestamp()),
level: NotificationLevel::Error,
title: format!("{} is unhealthy", container.app_id),
message: if restarted {
format!(
"Container restarted ({}/{} attempts). May need manual attention.",
attempt, MAX_RESTART_ATTEMPTS
)
} else {
format!(
"Auto-restart failed (attempt {}/{}). Container state: {}",
attempt, MAX_RESTART_ATTEMPTS, container.state
)
},
timestamp: chrono::Utc::now().to_rfc3339(),
app_id: Some(container.app_id.clone()),
};
// Fire-and-forget webhook delivery (checks config internally)
let webhook_payload = webhooks::WebhookPayload {
event: WebhookEvent::ContainerCrash,
title: notification.title,
message: notification.message,
timestamp: notification.timestamp,
node_id: String::new(),
details: Some(serde_json::json!({
"container": container.name,
"app_id": container.app_id,
"state": container.state,
})),
};
webhooks::send_webhook(&data_dir, webhook_payload).await;
data.notifications.push(notification.clone());
if data.notifications.len() > 20 {
data.notifications = data.notifications.split_off(data.notifications.len() - 20);
}
state_changed = true;
let webhook_payload = webhooks::WebhookPayload {
event: WebhookEvent::ContainerCrash,
title: notification.title,
message: notification.message,
timestamp: notification.timestamp,
node_id: String::new(),
details: Some(serde_json::json!({
"container": container.name,
"app_id": container.app_id,
"state": container.state,
"attempt": attempt,
"tier": format!("{:?}", tier),
})),
};
webhooks::send_webhook(&data_dir, webhook_payload).await;
}
}
}
@@ -261,7 +372,6 @@ mod tests {
#[test]
fn test_restart_tracker_max_attempts_exceeded() {
let mut tracker = RestartTracker::new();
// First MAX_RESTART_ATTEMPTS attempts should return true
for i in 1..=MAX_RESTART_ATTEMPTS {
assert!(
tracker.record_attempt("container-a"),
@@ -269,7 +379,6 @@ mod tests {
i
);
}
// Next attempt exceeds max, returns false
assert!(!tracker.record_attempt("container-a"));
assert_eq!(tracker.attempt_count("container-a"), MAX_RESTART_ATTEMPTS + 1);
}
@@ -300,13 +409,11 @@ mod tests {
#[test]
fn test_restart_tracker_clear_allows_new_attempts() {
let mut tracker = RestartTracker::new();
// Exhaust attempts
for _ in 0..=MAX_RESTART_ATTEMPTS {
tracker.record_attempt("container-y");
}
assert!(!tracker.record_attempt("container-y"));
// Clear and try again
tracker.clear("container-y");
assert!(tracker.record_attempt("container-y"));
assert_eq!(tracker.attempt_count("container-y"), 1);
@@ -315,7 +422,6 @@ mod tests {
#[test]
fn test_restart_tracker_clear_nonexistent_is_safe() {
let mut tracker = RestartTracker::new();
// Should not panic
tracker.clear("nonexistent");
assert_eq!(tracker.attempt_count("nonexistent"), 0);
}
@@ -348,7 +454,6 @@ mod tests {
#[test]
fn test_max_restart_attempts_constant() {
// Ensure the constant is a reasonable value (not 0, not too high)
assert!(MAX_RESTART_ATTEMPTS >= 1);
assert!(MAX_RESTART_ATTEMPTS <= 10);
assert_eq!(MAX_RESTART_ATTEMPTS, 3);
@@ -358,4 +463,75 @@ mod tests {
fn test_check_interval_constant() {
assert_eq!(CHECK_INTERVAL_SECS, 60);
}
#[test]
fn test_backoff_delays() {
let tracker = RestartTracker::new();
// Before any attempts, delay is first backoff
assert_eq!(tracker.backoff_delay_secs("test"), BACKOFF_DELAYS_SECS[0]);
}
#[test]
fn test_backoff_delays_escalate() {
let mut tracker = RestartTracker::new();
tracker.record_attempt("test");
assert_eq!(tracker.backoff_delay_secs("test"), BACKOFF_DELAYS_SECS[0]); // 10s
tracker.record_attempt("test");
assert_eq!(tracker.backoff_delay_secs("test"), BACKOFF_DELAYS_SECS[1]); // 30s
tracker.record_attempt("test");
assert_eq!(tracker.backoff_delay_secs("test"), BACKOFF_DELAYS_SECS[2]); // 90s
}
#[test]
fn test_backoff_elapsed_true_for_new() {
let tracker = RestartTracker::new();
assert!(tracker.backoff_elapsed("test"));
}
#[test]
fn test_stability_reset_not_triggered_early() {
let mut tracker = RestartTracker::new();
tracker.record_attempt("test");
assert!(!tracker.should_reset_failed("test"));
}
#[test]
fn test_container_tier_database() {
assert_eq!(container_tier("archy-btcpay-db"), StartupTier::Database);
assert_eq!(container_tier("immich_postgres"), StartupTier::Database);
assert_eq!(container_tier("penpot-valkey"), StartupTier::Database);
}
#[test]
fn test_container_tier_core() {
assert_eq!(container_tier("bitcoin-knots"), StartupTier::CoreInfra);
}
#[test]
fn test_container_tier_dependent() {
assert_eq!(container_tier("lnd"), StartupTier::DependentService);
assert_eq!(container_tier("mempool-electrs"), StartupTier::DependentService);
assert_eq!(container_tier("archy-nbxplorer"), StartupTier::DependentService);
}
#[test]
fn test_container_tier_frontend() {
assert_eq!(container_tier("archy-mempool-web"), StartupTier::Frontend);
assert_eq!(container_tier("archy-bitcoin-ui"), StartupTier::Frontend);
}
#[test]
fn test_container_tier_application_default() {
assert_eq!(container_tier("nextcloud"), StartupTier::Application);
assert_eq!(container_tier("grafana"), StartupTier::Application);
assert_eq!(container_tier("btcpay-server"), StartupTier::Application);
}
#[test]
fn test_tier_ordering() {
assert!(StartupTier::Database < StartupTier::CoreInfra);
assert!(StartupTier::CoreInfra < StartupTier::DependentService);
assert!(StartupTier::DependentService < StartupTier::Application);
assert!(StartupTier::Application < StartupTier::Frontend);
}
}

View File

@@ -312,6 +312,13 @@ impl EndpointRateLimiter {
limits.insert("system.shutdown".to_string(), (2, 300));
// Password changes
limits.insert("auth.changePassword".to_string(), (3, 300));
// Federation join: prevent invite-code brute force
limits.insert("federation.join".to_string(), (5, 60));
limits.insert("federation.invite".to_string(), (10, 300));
// Inter-node federation RPCs (unauthenticated, need stricter limits)
limits.insert("federation.peer-joined".to_string(), (10, 60));
limits.insert("federation.peer-address-changed".to_string(), (10, 60));
limits.insert("federation.get-state".to_string(), (30, 60));
Self {
requests: Arc::new(RwLock::new(HashMap::new())),