feat(server): lazy-bind FIPS peer listener so fips.install doesn't

need an archipelago restart

Previously the server checked `fips0` once at startup; if the
interface wasn't up (pre-onboarding, or post-onboarding before the
user clicked Activate FIPS), the peer listener never bound and stayed
unreachable until the next archipelago restart.

Replaced with a `peer_late_bind_loop` background task: polls every
30s for an fd00::/8 address on `fips0` and binds the listener the
moment one appears. First tick fires immediately so the hot path —
fips0 already up at startup — is still zero-cost. Cancellation
cascades through the same `tokio::sync::watch` channel the main
listener uses.

Side effects:
- main.rs no longer computes peer_addr eagerly; dropped the unused
  param from serve_with_shutdown.
- FipsTransport::is_available already caches the service probe so
  the 30s poll doesn't thrash systemctl.

Covers task #21. Unblocks the first-boot + onboarding flow for
fresh ISO installs on .253.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-04-19 04:21:20 -04:00
parent bfe2603f69
commit 84943aaa04
2 changed files with 66 additions and 43 deletions

View File

@@ -160,18 +160,9 @@ async fn main() -> Result<()> {
.parse()
.context("Invalid bind address")?;
// If the FIPS daemon has brought up `fips0` with a ULA address, bind a
// second listener there for peer-to-peer traffic. The peer listener
// applies a path whitelist (see server::is_peer_allowed_path) so FIPS
// peers can only reach signed peer endpoints, not internal surfaces.
// No address → no peer listener (fresh install pre-onboarding, fips
// service down, etc.); peers fall through to Tor until next restart.
let peer_addr: Option<SocketAddr> = fips::iface::fips0_ula().map(|ip| {
SocketAddr::new(std::net::IpAddr::V6(ip), fips::dial::PEER_PORT)
});
if let Some(pa) = peer_addr {
info!("FIPS peer listener will bind {}", pa);
}
// The FIPS peer listener is bound lazily by server::serve_with_shutdown
// on a 30s poll of fips0 — so a post-onboarding fips.install brings it
// online without needing an archipelago restart.
// Spawn background update scheduler
let update_data_dir = config.data_dir.clone();
@@ -223,7 +214,7 @@ async fn main() -> Result<()> {
}
};
server.serve_with_shutdown(addr, peer_addr, shutdown).await?;
server.serve_with_shutdown(addr, shutdown).await?;
// Clean shutdown: remove PID marker so next startup doesn't trigger recovery
crash_recovery::remove_pid_marker(&config.data_dir).await;

View File

@@ -407,17 +407,18 @@ impl Server {
/// Serve with a graceful shutdown signal.
///
/// `main_addr` is the primary listener (historically `127.0.0.1:5678`).
/// `peer_addr` is an optional second listener bound to the `fips0` ULA
/// — when present, connections on that listener are subjected to the
/// peer path whitelist ([`is_peer_allowed_path`]) so FIPS peers can
/// reach only the signed peer-to-peer endpoints, not internal surfaces.
/// The main listener always comes up on `main_addr`. The FIPS peer
/// listener (path-filtered, bound to `fips0`'s ULA) is managed by a
/// late-binding task that polls every 30s: if fips0 isn't up at
/// startup (pre-onboarding install, legacy node pre-fips.install),
/// it keeps trying until the interface appears — no archipelago
/// restart required after the user activates FIPS.
///
/// When `shutdown` completes, both listeners stop accepting and drain
/// in-flight requests (bounded by `DRAIN_TIMEOUT`).
pub async fn serve_with_shutdown(
&self,
main_addr: SocketAddr,
peer_addr: Option<SocketAddr>,
shutdown: impl std::future::Future<Output = ()>,
) -> Result<()> {
let active_connections = Arc::new(tokio::sync::Semaphore::new(1024));
@@ -432,28 +433,13 @@ impl Server {
main_addr,
));
let peer_task = if let Some(addr) = peer_addr {
let listener = match TcpListener::bind(addr).await {
Ok(l) => l,
Err(e) => {
warn!("FIPS peer listener bind to {} failed: {} — peers unreachable over FIPS until restart", addr, e);
let _ = tx.send(true);
main_task.await.ok();
return Err(e.into());
}
};
info!("FIPS peer listener bound to {}", addr);
Some(tokio::spawn(accept_loop(
self.api_handler.clone(),
listener,
active_connections.clone(),
true, // peer listener: apply path filter
tx.subscribe(),
addr,
)))
} else {
None
};
// Peer listener: late-binding so we don't need an archipelago
// restart when fips0 comes up after onboarding.
let peer_task = tokio::spawn(peer_late_bind_loop(
self.api_handler.clone(),
active_connections.clone(),
tx.subscribe(),
));
shutdown.await;
info!("Shutdown signal received, draining connections...");
@@ -471,15 +457,61 @@ impl Server {
}
let _ = main_task.await;
if let Some(t) = peer_task {
let _ = t.await;
}
let _ = peer_task.await;
info!("Shutdown complete");
Ok(())
}
}
/// Poll every 30s for `fips0`'s ULA; when it appears, bind the peer
/// listener and run the normal accept loop. If the bind fails (port
/// already taken, permissions), log and keep retrying. Returns on
/// shutdown. First tick fires immediately so the hot path for
/// already-up fips0 is still zero-cost.
async fn peer_late_bind_loop(
handler: Arc<ApiHandler>,
active_connections: Arc<tokio::sync::Semaphore>,
mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
) {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
loop {
tokio::select! {
_ = interval.tick() => {
let Some(ip) = crate::fips::iface::fips0_ula() else { continue };
let addr = SocketAddr::new(
std::net::IpAddr::V6(ip),
crate::fips::dial::PEER_PORT,
);
let listener = match TcpListener::bind(addr).await {
Ok(l) => l,
Err(e) => {
warn!("FIPS peer listener bind {} failed: {} — retrying in 30s", addr, e);
continue;
}
};
info!("FIPS peer listener bound {}", addr);
// Once bound, serve until shutdown fires. accept_loop
// returns on shutdown, which also ends this outer loop.
accept_loop(
handler,
listener,
active_connections,
true, // peer listener: apply path filter
shutdown_rx,
addr,
)
.await;
return;
}
_ = shutdown_rx.changed() => {
if *shutdown_rx.borrow() { return; }
}
}
}
}
/// Whitelist of HTTP paths reachable via the peer-facing (FIPS) listener.
/// Every entry is an endpoint already protected by cryptographic auth
/// (ed25519 signature verification inside the handler, federation DID