fix(install): kick scanner post-install so Launch button appears immediately

After install completes, the async-spawn wrapper wrote state=Running
but the skeletal install-time manifest (interfaces: None) persisted
until the next scheduled 60s scan. The frontend saw state=running but
hasUI=false and hid the Launch button for up to a full minute.

Add a shared Notify/watch pair between RpcHandler and the scan loop:
  - scan_kick (Notify): scan loop selects! between the 60s interval
    and this notify, running immediately on either.
  - scan_tick (watch<u64>): scan loop bumps the counter after each
    completed scan so callers can await completion.

Install and update success paths now call kick_scanner_and_wait before
flipping to Running. The scan merges via merge_preserving_transitional
(state stays Installing/Updating, manifest refreshed from live podman
with interfaces.main.ui populated from real port bindings). 2s timeout
falls back to pre-fix behavior on slow podman — no regression.
This commit is contained in:
archipelago
2026-04-23 07:59:03 -04:00
parent 8cc84ebcb7
commit f86d86c354
3 changed files with 84 additions and 2 deletions

View File

@@ -87,6 +87,15 @@ pub struct RpcHandler {
/// Our own Ed25519 pubkey hex — needed by ContentRef senders for cap scoping
/// and by ContentRef receivers to request caps scoped to themselves.
pub(crate) self_pubkey_hex: Arc<tokio::sync::RwLock<Option<String>>>,
/// Kick the package scanner to run immediately (bypassing the 60s interval).
/// Used by install/update success paths so the fresh manifest (with populated
/// `interfaces.main.ui`) lands before we flip state to Running — closes the
/// "Launch button is missing for up to 60s after install" UX gap.
pub(crate) scan_kick: Arc<tokio::sync::Notify>,
/// Monotonic counter incremented by the scan loop after each completed scan.
/// Install/update success paths subscribe to this to know when a kicked scan
/// has actually finished before flipping to the terminal state.
pub(crate) scan_tick: Arc<tokio::sync::watch::Sender<u64>>,
}
impl RpcHandler {
@@ -144,6 +153,8 @@ impl RpcHandler {
transport_router: Arc::new(tokio::sync::RwLock::new(None)),
blob_store: Arc::new(tokio::sync::RwLock::new(None)),
self_pubkey_hex: Arc::new(tokio::sync::RwLock::new(None)),
scan_kick: Arc::new(tokio::sync::Notify::new()),
scan_tick: Arc::new(tokio::sync::watch::channel(0u64).0),
})
}
@@ -184,6 +195,21 @@ impl RpcHandler {
Arc::clone(&self.mesh_service)
}
/// Shared Notify handle the package-scanner loop waits on (in addition to
/// its periodic tick). Install/update success paths call `notify_one()` to
/// trigger an immediate scan so the fresh manifest lands before we flip to
/// the terminal Running state.
pub fn scan_kick(&self) -> Arc<tokio::sync::Notify> {
Arc::clone(&self.scan_kick)
}
/// Sender half of the scan-completion watch channel. The scanner bumps this
/// counter after every finished scan; install/update wait for an advance
/// after kicking so they know the fresh manifest has landed.
pub fn scan_tick(&self) -> Arc<tokio::sync::watch::Sender<u64>> {
Arc::clone(&self.scan_tick)
}
fn cookie_suffix_for_request(&self, headers: &hyper::header::HeaderMap) -> &'static str {
// Only set Secure flag when the original request was over HTTPS.
// Nginx sends X-Forwarded-Proto: https for HTTPS connections.

View File

@@ -91,6 +91,13 @@ impl RpcHandler {
info!("package.install {}: complete", package_id_spawn);
// The install pipeline has verified the container is up
// and healthy (see install.rs post-start exit check).
// Kick the scanner first so the fresh manifest (with
// `interfaces.main.ui` from the live port binding) lands
// BEFORE we flip to Running — without this the Launch
// button is missing for up to 60s after a successful
// install, because the skeletal install-time manifest
// has `interfaces: None`.
kick_scanner_and_wait(&handler).await;
// We MUST explicitly transition out of Installing here:
// `merge_preserving_transitional` in the package-scan
// loop treats Installing as RPC-owned and refuses to
@@ -259,6 +266,10 @@ impl RpcHandler {
// stuck at Updating forever. The update pipeline has
// already verified the new container is running via its
// post-recreate check.
// Kick the scanner first so any manifest changes from the
// new image version (interfaces, ports, etc.) land before
// we flip to Running.
kick_scanner_and_wait(&handler).await;
set_package_state(
&handler.state_manager,
&package_id_spawn,
@@ -406,3 +417,34 @@ async fn remove_package_entry(state_manager: &StateManager, package_id: &str) {
state_manager.update_data(data).await;
}
}
/// Kick the container scanner to run immediately and wait for it to finish
/// (with a 2s timeout). Used by install/update success paths so the fresh
/// manifest — with `interfaces.main.ui` populated from the now-running
/// container's port binding — lands BEFORE we flip state to Running.
///
/// Without this, the frontend sees `state = running` but the skeletal
/// install-time manifest (interfaces = None), and hides the Launch button
/// for up to the full 60s scan interval.
///
/// The scan merges via `merge_preserving_transitional`, which keeps
/// state = Installing (we haven't flipped yet) while taking the fresh
/// manifest. After this returns, the caller writes Running on top of the
/// now-populated manifest.
async fn kick_scanner_and_wait(handler: &RpcHandler) {
let mut rx = handler.scan_tick.subscribe();
let start = *rx.borrow_and_update();
handler.scan_kick.notify_one();
// 2s is well above a typical podman scan (~200ms on .228, ~500ms worst
// case). If it times out we proceed anyway — the next 60s scan will
// self-heal and the worst case is the pre-fix behavior (Launch button
// appears a bit late).
let _ = tokio::time::timeout(std::time::Duration::from_secs(2), async {
while *rx.borrow_and_update() == start {
if rx.changed().await.is_err() {
break;
}
}
})
.await;
}

View File

@@ -313,6 +313,8 @@ impl Server {
let scanner = create_docker_scanner(&config).await?;
let state = state_manager.clone();
let identity_clone = identity.clone();
let scan_kick = api_handler.rpc_handler().scan_kick();
let scan_tick = api_handler.rpc_handler().scan_tick();
// Initial scan (delayed to let crash recovery finish first)
tokio::spawn(async move {
@@ -339,8 +341,14 @@ impl Server {
{
error!("Failed to scan containers: {}", e);
}
// Bump the scan-completion counter so any caller waiting on a
// kicked scan (install/update success path) can proceed.
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
// Periodic scan every 60 seconds (only broadcasts if state changed)
// Periodic scan every 60 seconds (only broadcasts if state changed).
// Also wakes immediately when `scan_kick` fires — install/update
// success paths poke it so the fresh manifest (with populated
// interfaces) lands before they flip state to Running.
// Uses an in-flight guard to skip scans when a previous one is still running
let mut interval = tokio::time::interval(Duration::from_secs(60));
// Skip missed ticks instead of catching up — prevents burst of scans
@@ -348,7 +356,12 @@ impl Server {
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
let scanning = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
loop {
interval.tick().await;
tokio::select! {
_ = interval.tick() => {}
_ = scan_kick.notified() => {
debug!("Scan kicked by install/update success — running immediately");
}
}
if scanning.load(std::sync::atomic::Ordering::Relaxed) {
debug!("Skipping container scan — previous scan still in progress");
continue;
@@ -365,6 +378,7 @@ impl Server {
{
error!("Failed to update containers: {}", e);
}
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
scanning.store(false, std::sync::atomic::Ordering::Relaxed);
}
});