release(v1.7.30-alpha): live install/uninstall progress + cleaner pull waterfall

- Backend: unified pull-progress streaming across primary AND fallback
  registries. Earlier code only streamed for the primary attempt; if it
  failed fast (VPS 404, etc.) the UI froze at 0% until the fallback
  finished. The waterfall now uses a single shared helper that streams
  podman stderr through update_install_progress for every URL tried.
- Backend: PackageDataEntry gains uninstall_stage, set at each phase of
  handle_package_uninstall ("Stopping containers (i/total)",
  "Cleaning up volumes", "Removing app data"). State flips to Removing
  during the pipeline.
- Frontend: MarketplaceAppCard renders the live progress bar with byte
  counts during installs, matching the System Update download bar style.
- Frontend: AppCard renders the live uninstall stage label per app.
  Modal closes immediately on confirm so concurrent uninstalls each
  show their own progress on their own card.
- Cleanup: removed dead helpers (image_candidates, rewrite_for_primary,
  primary_image_url, pull_from_registries_with_skip) made unused by
  the install.rs refactor.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-04-21 19:11:36 -04:00
parent 1709149ebd
commit 18f0929614
13 changed files with 221 additions and 209 deletions

2
core/Cargo.lock generated
View File

@@ -80,7 +80,7 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "archipelago"
version = "1.7.29-alpha"
version = "1.7.30-alpha"
dependencies = [
"anyhow",
"archipelago-container",

View File

@@ -1,6 +1,6 @@
[package]
name = "archipelago"
version = "1.7.29-alpha"
version = "1.7.30-alpha"
edition = "2021"
description = "Archipelago Bitcoin Node OS - Native backend"
authors = ["Archipelago Team"]

View File

@@ -635,47 +635,32 @@ impl RpcHandler {
unreachable!()
}
/// Single image pull attempt with progress streaming.
async fn do_pull_image(&self, package_id: &str, docker_image: &str) -> Result<()> {
debug!("Pulling image: {}", docker_image);
self.set_install_progress(package_id, 0, 0).await;
// Set TMPDIR to user-writable location — rootless podman's user namespace
// makes /var/tmp read-only, which causes `podman pull` to fail with
// "mkdir /var/tmp/container_images_storage...: read-only file system"
let user_tmp = format!(
"{}/.local/share/containers/tmp",
std::env::var("HOME").unwrap_or_else(|_| "/home/archipelago".to_string())
);
let _ = std::fs::create_dir_all(&user_tmp);
// Rewrite to the primary registry's URL so the first attempt
// honors the operator's mirror choice (default: VPS) instead of
// blindly using whatever registry the image was hardcoded to.
// If the rewritten URL fails, pull_from_registries_with_skip
// falls through to the other configured registries.
let (primary_url, primary_tls) =
crate::container::registry::primary_image_url(&self.config.data_dir, docker_image)
.await;
if primary_url != docker_image {
debug!("Rewrote {} → {} for primary registry", docker_image, primary_url);
}
let mut pull_args = vec!["pull".to_string(), primary_url.clone()];
if !primary_tls {
/// Pull one image URL with live progress streamed through
/// `update_install_progress`. Returns Ok(true) on a successful pull,
/// Ok(false) on transient failure (so the caller can try the next
/// mirror), Err only for unrecoverable setup errors.
async fn pull_one_url_with_progress(
&self,
url: &str,
tls_verify: bool,
package_id: &str,
user_tmp: &str,
) -> Result<bool> {
let mut pull_args = vec!["pull".to_string(), url.to_string()];
if !tls_verify {
pull_args.push("--tls-verify=false".to_string());
}
let mut child = tokio::process::Command::new("podman")
.args(&pull_args)
.env("TMPDIR", &user_tmp)
.env("TMPDIR", user_tmp)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("Failed to start image pull")?;
// Wrap the entire pull (stderr progress + wait) in a 10-minute timeout.
// Large image layers (Minio, Postgres, ffmpeg) can take several minutes
// to pull. 60s was too short and caused premature retries on slow registries.
// 10-minute per-URL budget — large layers (Minio, Postgres,
// ffmpeg) regularly take several minutes and we'd rather wait
// than bounce to the next mirror mid-download.
let pull_result = tokio::time::timeout(std::time::Duration::from_secs(600), async {
if let Some(stderr) = child.stderr.take() {
let reader = BufReader::new(stderr);
@@ -693,50 +678,115 @@ impl RpcHandler {
})
.await;
let primary_failed = match pull_result {
Ok(Ok(status)) => !status.success(),
match pull_result {
Ok(Ok(status)) => Ok(status.success()),
Ok(Err(e)) => {
tracing::warn!("Image pull process error: {}", e);
true
tracing::warn!("Image pull process error on {}: {}", url, e);
Ok(false)
}
Err(_) => {
tracing::warn!("Image pull timed out after 60s: {}", primary_url);
tracing::warn!("Image pull timed out after 600s: {}", url);
let _ = child.kill().await;
let _ = child.wait().await; // reap zombie
true
Ok(false)
}
};
if !primary_failed && primary_url != docker_image {
// Primary pull succeeded but used a rewritten URL. Tag under
// the original image reference so downstream code (images -q,
// run -d docker_image, etc.) finds it.
let _ = tokio::process::Command::new("podman")
.args(["tag", &primary_url, docker_image])
.output()
.await;
tracing::info!("Pulled {} from primary registry ({})", docker_image, primary_url);
}
if primary_failed {
// Primary failed — walk the remaining configured registries.
// Skip primary_url so we don't retry what just failed.
match crate::container::registry::pull_from_registries_with_skip(
&self.config.data_dir,
docker_image,
&user_tmp,
Some(&primary_url),
)
}
/// Pull a container image, trying each configured registry in
/// priority order and streaming progress during every attempt. The
/// primary is tried first; if it doesn't have the image (or 404's),
/// the next mirror is tried — with its own progress streaming, so
/// the UI doesn't freeze at 0% after a primary miss. On success the
/// image is tagged under `docker_image` so downstream commands
/// (images -q, run -d, etc.) can find it by its canonical name.
async fn do_pull_image(&self, package_id: &str, docker_image: &str) -> Result<()> {
debug!("Pulling image: {}", docker_image);
self.set_install_progress(package_id, 0, 0).await;
// Set TMPDIR to user-writable location — rootless podman's user namespace
// makes /var/tmp read-only, which causes `podman pull` to fail with
// "mkdir /var/tmp/container_images_storage...: read-only file system"
let user_tmp = format!(
"{}/.local/share/containers/tmp",
std::env::var("HOME").unwrap_or_else(|_| "/home/archipelago".to_string())
);
let _ = std::fs::create_dir_all(&user_tmp);
// Build the ordered candidate list: every enabled registry
// (highest priority first), each rewriting the image URL to its
// own origin. Deduplicate — two registries that happen to share
// a URL should only be tried once.
let config = crate::container::registry::load_registries(&self.config.data_dir)
.await
.unwrap_or_default();
let mut tried: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut candidates: Vec<(String, bool)> = Vec::new();
for reg in config.active_registries() {
let url = config.rewrite_image(docker_image, reg);
if tried.insert(url.clone()) {
candidates.push((url, reg.tls_verify));
}
}
// If no registries are configured, fall back to the literal URL.
if candidates.is_empty() {
candidates.push((docker_image.to_string(), true));
}
// Walk candidates, streaming progress for each attempt.
let mut pulled_url: Option<String> = None;
let attempts = candidates.len();
for (i, (url, tls_verify)) in candidates.iter().enumerate() {
if url != docker_image {
debug!("Attempt {}/{}: {}", i + 1, attempts, url);
} else {
debug!("Attempt {}/{}: {} (literal)", i + 1, attempts, url);
}
// Reset progress at the top of each attempt so the UI reflects
// the fresh pull instead of showing stale bytes from a prior
// partial attempt.
self.set_install_progress(package_id, 0, 0).await;
match self
.pull_one_url_with_progress(url, *tls_verify, package_id, &user_tmp)
.await?
{
Ok(_) => {
tracing::info!("Pulled {} via fallback registry", docker_image);
true => {
tracing::info!("Pulled {} from {}", docker_image, url);
pulled_url = Some(url.clone());
break;
}
Err(e) => {
return Err(anyhow::anyhow!("Image pull failed: {}", e));
false => {
tracing::debug!(
"Pull attempt {}/{} failed for {}, trying next mirror",
i + 1,
attempts,
url
);
continue;
}
}
}
// Verify image exists locally after pull
let Some(pulled_url) = pulled_url else {
return Err(anyhow::anyhow!(
"Image pull failed from all {} configured registries for {}",
attempts,
docker_image
));
};
// Tag under the original docker_image reference if the successful
// pull came from a rewritten URL — downstream code (images -q,
// run -d docker_image, etc.) needs to find it by its canonical
// name regardless of which mirror actually served the bytes.
if pulled_url != docker_image {
let _ = tokio::process::Command::new("podman")
.args(["tag", &pulled_url, docker_image])
.output()
.await;
}
// Verify image exists locally after pull.
let verify = tokio::process::Command::new("podman")
.args(["images", "-q", docker_image])
.output()

View File

@@ -28,6 +28,18 @@ impl RpcHandler {
self.state_manager.update_data(data).await;
}
/// Set the uninstall stage label so the UI can show what's happening
/// instead of a generic spinner. Each call broadcasts a state change
/// — call sparingly (one per pipeline phase, not per container).
pub(super) async fn set_uninstall_stage(&self, package_id: &str, stage: &str) {
let (mut data, _rev) = self.state_manager.get_snapshot().await;
if let Some(entry) = data.package_data.get_mut(package_id) {
entry.uninstall_stage = Some(stage.to_string());
entry.state = crate::data_model::PackageState::Removing;
}
self.state_manager.update_data(data).await;
}
/// Update install progress (static method for use in async closures).
pub(super) async fn update_install_progress(
state_manager: &crate::state::StateManager,
@@ -81,6 +93,7 @@ fn create_installing_entry(package_id: &str) -> PackageDataEntry {
},
installed: None,
install_progress: None,
uninstall_stage: None,
available_update: None,
}
}

View File

@@ -261,12 +261,28 @@ impl RpcHandler {
if containers_to_remove.is_empty() {
tracing::warn!("Uninstall {}: no containers found", package_id);
}
let total = containers_to_remove.len();
let mut stopped = 0u32;
let mut removed = 0u32;
let mut errors = Vec::new();
for name in &containers_to_remove {
self.set_uninstall_stage(
package_id,
&if total > 0 {
format!("Stopping containers (0/{})", total)
} else {
"Cleaning up".to_string()
},
)
.await;
for (i, name) in containers_to_remove.iter().enumerate() {
self.set_uninstall_stage(
package_id,
&format!("Stopping containers ({}/{})", i + 1, total),
)
.await;
tracing::info!("Uninstall {}: stopping container {}", package_id, name);
let stop_out = tokio::process::Command::new("podman")
.args(["stop", "-t", stop_timeout_secs(name), name])
@@ -326,6 +342,7 @@ impl RpcHandler {
}
}
self.set_uninstall_stage(package_id, "Cleaning up volumes").await;
// Clean up dangling volumes associated with removed containers
let _ = tokio::process::Command::new("podman")
.args(["volume", "prune", "-f"])
@@ -354,6 +371,7 @@ impl RpcHandler {
// Clean data directories unless preserve_data
if !preserve_data {
self.set_uninstall_stage(package_id, "Removing app data").await;
let data_dirs = get_data_dirs_for_app(package_id);
for dir in &data_dirs {
tracing::info!("Uninstall {}: removing data {}", package_id, dir);

View File

@@ -247,6 +247,7 @@ impl DockerPackageScanner {
status: service_status,
}),
install_progress: None,
uninstall_stage: None,
};
packages.insert(app_id.clone(), package);

View File

@@ -57,6 +57,13 @@ impl Default for RegistryConfig {
enabled: true,
priority: 10,
},
Registry {
url: "146.59.87.168:3000/lfg2025".to_string(),
name: "Server 3 (OVH)".to_string(),
tls_verify: false,
enabled: true,
priority: 20,
},
],
}
}
@@ -80,42 +87,6 @@ impl RegistryConfig {
format!("{}/{}", registry.url, image_name)
}
/// Generate fallback image URLs to try (excludes the original since it already failed).
pub fn image_candidates(&self, image: &str) -> Vec<(String, bool)> {
let mut candidates = Vec::new();
// Rewrite for each active registry (skip if identical to original)
for reg in self.active_registries() {
let rewritten = self.rewrite_image(image, reg);
if rewritten != image {
candidates.push((rewritten, reg.tls_verify));
}
}
candidates
}
/// Rewrite an image to use the highest-priority enabled registry, so
/// the FIRST pull attempt honors the operator's primary choice instead
/// of blindly using whatever registry the image URL was hardcoded to.
/// Returns (rewritten_url, tls_verify) — or the original URL + default
/// tls_verify=true if there's no primary (no enabled registries).
pub fn rewrite_for_primary(&self, image: &str) -> (String, bool) {
match self.active_registries().first() {
Some(primary) => (self.rewrite_image(image, primary), primary.tls_verify),
None => (image.to_string(), true),
}
}
}
/// Load the registry config and rewrite an image to use the primary
/// registry's URL. Convenience wrapper for callers that don't already
/// have a `RegistryConfig` in hand.
pub async fn primary_image_url(data_dir: &Path, image: &str) -> (String, bool) {
match load_registries(data_dir).await {
Ok(config) => config.rewrite_for_primary(image),
Err(_) => (image.to_string(), true),
}
}
/// Extract the image name from a full image reference.
@@ -155,80 +126,6 @@ pub async fn save_registries(data_dir: &Path, config: &RegistryConfig) -> Result
Ok(())
}
/// Try pulling an image from configured registries in priority order.
/// If `already_tried` is Some, that URL is skipped (avoids retrying the
/// primary when the caller already attempted it with progress streaming).
/// Returns the image reference that succeeded.
pub async fn pull_from_registries_with_skip(
data_dir: &Path,
image: &str,
tmpdir: &str,
already_tried: Option<&str>,
) -> Result<String> {
let config = load_registries(data_dir).await?;
let mut candidates = config.image_candidates(image);
if let Some(skip) = already_tried {
candidates.retain(|(url, _)| url != skip);
}
for (candidate, tls_verify) in &candidates {
debug!("Trying registry: {}", candidate);
let mut args = vec!["pull".to_string(), candidate.clone()];
if !tls_verify {
args.push("--tls-verify=false".to_string());
}
let mut child = tokio::process::Command::new("podman")
.args(&args)
.env("TMPDIR", tmpdir)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn()
.ok();
let status = if let Some(ref mut c) = child {
match tokio::time::timeout(std::time::Duration::from_secs(120), c.wait()).await {
Ok(Ok(s)) => Some(s.success()),
_ => {
let _ = c.kill().await;
let _ = c.wait().await;
debug!("Fallback pull timed out: {}", candidate);
None
}
}
} else {
None
};
if status == Some(true) {
// If we pulled from a non-original registry, tag it with the original name
if candidate != image {
let _ = tokio::process::Command::new("podman")
.args(["tag", candidate, image])
.status()
.await;
info!(
"Pulled {} from fallback registry, tagged as {}",
candidate, image
);
} else {
info!("Pulled {} from primary registry", image);
}
return Ok(candidate.clone());
}
debug!("Failed to pull from {}", candidate);
}
Err(anyhow::anyhow!(
"Failed to pull {} from all {} configured registries",
image,
candidates.len()
))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -259,39 +156,20 @@ mod tests {
);
}
#[test]
fn test_image_candidates() {
let config = RegistryConfig::default();
let candidates = config.image_candidates("git.tx1138.com/lfg2025/lnd:v0.18.4-beta");
// Defaults: VPS (primary) + tx1138. tx1138 is filtered out because
// it's identical to the original image URL, leaving one candidate.
assert_eq!(candidates.len(), 1);
// Primary-first — VPS rewrite leads the candidate list.
assert_eq!(candidates[0].0, "23.182.128.160:3000/lfg2025/lnd:v0.18.4-beta");
}
#[test]
fn test_rewrite_for_primary_uses_top_priority() {
let config = RegistryConfig::default();
let (url, tls) =
config.rewrite_for_primary("git.tx1138.com/lfg2025/lnd:v0.18.4-beta");
assert_eq!(url, "23.182.128.160:3000/lfg2025/lnd:v0.18.4-beta");
assert!(!tls, "VPS primary is HTTP — tls_verify should be false");
}
#[test]
fn test_active_registries_sorted() {
let config = RegistryConfig::default();
let active = config.active_registries();
assert_eq!(active.len(), 2);
assert_eq!(active.len(), 3);
assert!(active[0].priority <= active[1].priority);
assert!(active[1].priority <= active[2].priority);
}
#[tokio::test]
async fn test_load_default() {
let tmp = TempDir::new().unwrap();
let config = load_registries(tmp.path()).await.unwrap();
assert_eq!(config.registries.len(), 2);
assert_eq!(config.registries.len(), 3);
}
#[tokio::test]
@@ -307,6 +185,6 @@ mod tests {
});
save_registries(tmp.path(), &config).await.unwrap();
let loaded = load_registries(tmp.path()).await.unwrap();
assert_eq!(loaded.registries.len(), 3);
assert_eq!(loaded.registries.len(), 4);
}
}

View File

@@ -138,6 +138,13 @@ pub struct PackageDataEntry {
pub installed: Option<InstalledPackageDataEntry>,
#[serde(rename = "install-progress")]
pub install_progress: Option<InstallProgress>,
/// Live label describing the current uninstall step ("Stopping
/// containers (2/5)", "Removing data", …). Set by the uninstall
/// pipeline so the UI can show real progress instead of a generic
/// "Uninstalling…" spinner. Cleared after the package entry is
/// removed.
#[serde(rename = "uninstall-stage", skip_serializing_if = "Option::is_none", default)]
pub uninstall_stage: Option<String>,
/// Pinned image version from image-versions.sh when it differs from running version
#[serde(rename = "available-update", skip_serializing_if = "Option::is_none")]
pub available_update: Option<String>,