feat(server): lazy-bind FIPS peer listener so fips.install doesn't
need an archipelago restart Previously the server checked `fips0` once at startup; if the interface wasn't up (pre-onboarding, or post-onboarding before the user clicked Activate FIPS), the peer listener never bound and stayed unreachable until the next archipelago restart. Replaced with a `peer_late_bind_loop` background task: polls every 30s for an fd00::/8 address on `fips0` and binds the listener the moment one appears. First tick fires immediately so the hot path — fips0 already up at startup — is still zero-cost. Cancellation cascades through the same `tokio::sync::watch` channel the main listener uses. Side effects: - main.rs no longer computes peer_addr eagerly; dropped the unused param from serve_with_shutdown. - FipsTransport::is_available already caches the service probe so the 30s poll doesn't thrash systemctl. Covers task #21. Unblocks the first-boot + onboarding flow for fresh ISO installs on .253. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -160,18 +160,9 @@ async fn main() -> Result<()> {
|
||||
.parse()
|
||||
.context("Invalid bind address")?;
|
||||
|
||||
// If the FIPS daemon has brought up `fips0` with a ULA address, bind a
|
||||
// second listener there for peer-to-peer traffic. The peer listener
|
||||
// applies a path whitelist (see server::is_peer_allowed_path) so FIPS
|
||||
// peers can only reach signed peer endpoints, not internal surfaces.
|
||||
// No address → no peer listener (fresh install pre-onboarding, fips
|
||||
// service down, etc.); peers fall through to Tor until next restart.
|
||||
let peer_addr: Option<SocketAddr> = fips::iface::fips0_ula().map(|ip| {
|
||||
SocketAddr::new(std::net::IpAddr::V6(ip), fips::dial::PEER_PORT)
|
||||
});
|
||||
if let Some(pa) = peer_addr {
|
||||
info!("FIPS peer listener will bind {}", pa);
|
||||
}
|
||||
// The FIPS peer listener is bound lazily by server::serve_with_shutdown
|
||||
// on a 30s poll of fips0 — so a post-onboarding fips.install brings it
|
||||
// online without needing an archipelago restart.
|
||||
|
||||
// Spawn background update scheduler
|
||||
let update_data_dir = config.data_dir.clone();
|
||||
@@ -223,7 +214,7 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
};
|
||||
|
||||
server.serve_with_shutdown(addr, peer_addr, shutdown).await?;
|
||||
server.serve_with_shutdown(addr, shutdown).await?;
|
||||
|
||||
// Clean shutdown: remove PID marker so next startup doesn't trigger recovery
|
||||
crash_recovery::remove_pid_marker(&config.data_dir).await;
|
||||
|
||||
@@ -407,17 +407,18 @@ impl Server {
|
||||
/// Serve with a graceful shutdown signal.
|
||||
///
|
||||
/// `main_addr` is the primary listener (historically `127.0.0.1:5678`).
|
||||
/// `peer_addr` is an optional second listener bound to the `fips0` ULA
|
||||
/// — when present, connections on that listener are subjected to the
|
||||
/// peer path whitelist ([`is_peer_allowed_path`]) so FIPS peers can
|
||||
/// reach only the signed peer-to-peer endpoints, not internal surfaces.
|
||||
/// The main listener always comes up on `main_addr`. The FIPS peer
|
||||
/// listener (path-filtered, bound to `fips0`'s ULA) is managed by a
|
||||
/// late-binding task that polls every 30s: if fips0 isn't up at
|
||||
/// startup (pre-onboarding install, legacy node pre-fips.install),
|
||||
/// it keeps trying until the interface appears — no archipelago
|
||||
/// restart required after the user activates FIPS.
|
||||
///
|
||||
/// When `shutdown` completes, both listeners stop accepting and drain
|
||||
/// in-flight requests (bounded by `DRAIN_TIMEOUT`).
|
||||
pub async fn serve_with_shutdown(
|
||||
&self,
|
||||
main_addr: SocketAddr,
|
||||
peer_addr: Option<SocketAddr>,
|
||||
shutdown: impl std::future::Future<Output = ()>,
|
||||
) -> Result<()> {
|
||||
let active_connections = Arc::new(tokio::sync::Semaphore::new(1024));
|
||||
@@ -432,28 +433,13 @@ impl Server {
|
||||
main_addr,
|
||||
));
|
||||
|
||||
let peer_task = if let Some(addr) = peer_addr {
|
||||
let listener = match TcpListener::bind(addr).await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
warn!("FIPS peer listener bind to {} failed: {} — peers unreachable over FIPS until restart", addr, e);
|
||||
let _ = tx.send(true);
|
||||
main_task.await.ok();
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
info!("FIPS peer listener bound to {}", addr);
|
||||
Some(tokio::spawn(accept_loop(
|
||||
self.api_handler.clone(),
|
||||
listener,
|
||||
active_connections.clone(),
|
||||
true, // peer listener: apply path filter
|
||||
tx.subscribe(),
|
||||
addr,
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// Peer listener: late-binding so we don't need an archipelago
|
||||
// restart when fips0 comes up after onboarding.
|
||||
let peer_task = tokio::spawn(peer_late_bind_loop(
|
||||
self.api_handler.clone(),
|
||||
active_connections.clone(),
|
||||
tx.subscribe(),
|
||||
));
|
||||
|
||||
shutdown.await;
|
||||
info!("Shutdown signal received, draining connections...");
|
||||
@@ -471,15 +457,61 @@ impl Server {
|
||||
}
|
||||
|
||||
let _ = main_task.await;
|
||||
if let Some(t) = peer_task {
|
||||
let _ = t.await;
|
||||
}
|
||||
let _ = peer_task.await;
|
||||
|
||||
info!("Shutdown complete");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Poll every 30s for `fips0`'s ULA; when it appears, bind the peer
|
||||
/// listener and run the normal accept loop. If the bind fails (port
|
||||
/// already taken, permissions), log and keep retrying. Returns on
|
||||
/// shutdown. First tick fires immediately so the hot path for
|
||||
/// already-up fips0 is still zero-cost.
|
||||
async fn peer_late_bind_loop(
|
||||
handler: Arc<ApiHandler>,
|
||||
active_connections: Arc<tokio::sync::Semaphore>,
|
||||
mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
|
||||
) {
|
||||
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
|
||||
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = interval.tick() => {
|
||||
let Some(ip) = crate::fips::iface::fips0_ula() else { continue };
|
||||
let addr = SocketAddr::new(
|
||||
std::net::IpAddr::V6(ip),
|
||||
crate::fips::dial::PEER_PORT,
|
||||
);
|
||||
let listener = match TcpListener::bind(addr).await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
warn!("FIPS peer listener bind {} failed: {} — retrying in 30s", addr, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
info!("FIPS peer listener bound {}", addr);
|
||||
// Once bound, serve until shutdown fires. accept_loop
|
||||
// returns on shutdown, which also ends this outer loop.
|
||||
accept_loop(
|
||||
handler,
|
||||
listener,
|
||||
active_connections,
|
||||
true, // peer listener: apply path filter
|
||||
shutdown_rx,
|
||||
addr,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
_ = shutdown_rx.changed() => {
|
||||
if *shutdown_rx.borrow() { return; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whitelist of HTTP paths reachable via the peer-facing (FIPS) listener.
|
||||
/// Every entry is an endpoint already protected by cryptographic auth
|
||||
/// (ed25519 signature verification inside the handler, federation DID
|
||||
|
||||
Reference in New Issue
Block a user