From 84943aaa04024df9a205986c319105185113a9e3 Mon Sep 17 00:00:00 2001 From: Dorian Date: Sun, 19 Apr 2026 04:21:20 -0400 Subject: [PATCH] feat(server): lazy-bind FIPS peer listener so fips.install doesn't need an archipelago restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the server checked `fips0` once at startup; if the interface wasn't up (pre-onboarding, or post-onboarding before the user clicked Activate FIPS), the peer listener never bound and stayed unreachable until the next archipelago restart. Replaced with a `peer_late_bind_loop` background task: polls every 30s for an fd00::/8 address on `fips0` and binds the listener the moment one appears. First tick fires immediately so the hot path — fips0 already up at startup — is still zero-cost. Cancellation cascades through the same `tokio::sync::watch` channel the main listener uses. Side effects: - main.rs no longer computes peer_addr eagerly; dropped the unused param from serve_with_shutdown. - FipsTransport::is_available already caches the service probe so the 30s poll doesn't thrash systemctl. Covers task #21. Unblocks the first-boot + onboarding flow for fresh ISO installs on .253. Co-Authored-By: Claude Opus 4.7 (1M context) --- core/archipelago/src/main.rs | 17 ++----- core/archipelago/src/server.rs | 92 +++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 43 deletions(-) diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index f1390fe1..9d7dacea 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -160,18 +160,9 @@ async fn main() -> Result<()> { .parse() .context("Invalid bind address")?; - // If the FIPS daemon has brought up `fips0` with a ULA address, bind a - // second listener there for peer-to-peer traffic. The peer listener - // applies a path whitelist (see server::is_peer_allowed_path) so FIPS - // peers can only reach signed peer endpoints, not internal surfaces. - // No address → no peer listener (fresh install pre-onboarding, fips - // service down, etc.); peers fall through to Tor until next restart. - let peer_addr: Option = fips::iface::fips0_ula().map(|ip| { - SocketAddr::new(std::net::IpAddr::V6(ip), fips::dial::PEER_PORT) - }); - if let Some(pa) = peer_addr { - info!("FIPS peer listener will bind {}", pa); - } + // The FIPS peer listener is bound lazily by server::serve_with_shutdown + // on a 30s poll of fips0 — so a post-onboarding fips.install brings it + // online without needing an archipelago restart. // Spawn background update scheduler let update_data_dir = config.data_dir.clone(); @@ -223,7 +214,7 @@ async fn main() -> Result<()> { } }; - server.serve_with_shutdown(addr, peer_addr, shutdown).await?; + server.serve_with_shutdown(addr, shutdown).await?; // Clean shutdown: remove PID marker so next startup doesn't trigger recovery crash_recovery::remove_pid_marker(&config.data_dir).await; diff --git a/core/archipelago/src/server.rs b/core/archipelago/src/server.rs index fe61e093..742d7c6b 100644 --- a/core/archipelago/src/server.rs +++ b/core/archipelago/src/server.rs @@ -407,17 +407,18 @@ impl Server { /// Serve with a graceful shutdown signal. /// /// `main_addr` is the primary listener (historically `127.0.0.1:5678`). - /// `peer_addr` is an optional second listener bound to the `fips0` ULA - /// — when present, connections on that listener are subjected to the - /// peer path whitelist ([`is_peer_allowed_path`]) so FIPS peers can - /// reach only the signed peer-to-peer endpoints, not internal surfaces. + /// The main listener always comes up on `main_addr`. The FIPS peer + /// listener (path-filtered, bound to `fips0`'s ULA) is managed by a + /// late-binding task that polls every 30s: if fips0 isn't up at + /// startup (pre-onboarding install, legacy node pre-fips.install), + /// it keeps trying until the interface appears — no archipelago + /// restart required after the user activates FIPS. /// /// When `shutdown` completes, both listeners stop accepting and drain /// in-flight requests (bounded by `DRAIN_TIMEOUT`). pub async fn serve_with_shutdown( &self, main_addr: SocketAddr, - peer_addr: Option, shutdown: impl std::future::Future, ) -> Result<()> { let active_connections = Arc::new(tokio::sync::Semaphore::new(1024)); @@ -432,28 +433,13 @@ impl Server { main_addr, )); - let peer_task = if let Some(addr) = peer_addr { - let listener = match TcpListener::bind(addr).await { - Ok(l) => l, - Err(e) => { - warn!("FIPS peer listener bind to {} failed: {} — peers unreachable over FIPS until restart", addr, e); - let _ = tx.send(true); - main_task.await.ok(); - return Err(e.into()); - } - }; - info!("FIPS peer listener bound to {}", addr); - Some(tokio::spawn(accept_loop( - self.api_handler.clone(), - listener, - active_connections.clone(), - true, // peer listener: apply path filter - tx.subscribe(), - addr, - ))) - } else { - None - }; + // Peer listener: late-binding so we don't need an archipelago + // restart when fips0 comes up after onboarding. + let peer_task = tokio::spawn(peer_late_bind_loop( + self.api_handler.clone(), + active_connections.clone(), + tx.subscribe(), + )); shutdown.await; info!("Shutdown signal received, draining connections..."); @@ -471,15 +457,61 @@ impl Server { } let _ = main_task.await; - if let Some(t) = peer_task { - let _ = t.await; - } + let _ = peer_task.await; info!("Shutdown complete"); Ok(()) } } +/// Poll every 30s for `fips0`'s ULA; when it appears, bind the peer +/// listener and run the normal accept loop. If the bind fails (port +/// already taken, permissions), log and keep retrying. Returns on +/// shutdown. First tick fires immediately so the hot path for +/// already-up fips0 is still zero-cost. +async fn peer_late_bind_loop( + handler: Arc, + active_connections: Arc, + mut shutdown_rx: tokio::sync::watch::Receiver, +) { + let mut interval = tokio::time::interval(std::time::Duration::from_secs(30)); + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + tokio::select! { + _ = interval.tick() => { + let Some(ip) = crate::fips::iface::fips0_ula() else { continue }; + let addr = SocketAddr::new( + std::net::IpAddr::V6(ip), + crate::fips::dial::PEER_PORT, + ); + let listener = match TcpListener::bind(addr).await { + Ok(l) => l, + Err(e) => { + warn!("FIPS peer listener bind {} failed: {} — retrying in 30s", addr, e); + continue; + } + }; + info!("FIPS peer listener bound {}", addr); + // Once bound, serve until shutdown fires. accept_loop + // returns on shutdown, which also ends this outer loop. + accept_loop( + handler, + listener, + active_connections, + true, // peer listener: apply path filter + shutdown_rx, + addr, + ) + .await; + return; + } + _ = shutdown_rx.changed() => { + if *shutdown_rx.borrow() { return; } + } + } + } +} + /// Whitelist of HTTP paths reachable via the peer-facing (FIPS) listener. /// Every entry is an endpoint already protected by cryptographic auth /// (ed25519 signature verification inside the handler, federation DID