diff --git a/core/Cargo.lock b/core/Cargo.lock index 31ee70c6..f7173bb5 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -80,7 +80,7 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "archipelago" -version = "1.7.14-alpha" +version = "1.7.15-alpha" dependencies = [ "anyhow", "archipelago-container", diff --git a/core/archipelago/Cargo.toml b/core/archipelago/Cargo.toml index 9e22dec3..0b7c3181 100644 --- a/core/archipelago/Cargo.toml +++ b/core/archipelago/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "archipelago" -version = "1.7.14-alpha" +version = "1.7.15-alpha" edition = "2021" description = "Archipelago Bitcoin Node OS - Native backend" authors = ["Archipelago Team"] diff --git a/core/archipelago/src/api/rpc/update.rs b/core/archipelago/src/api/rpc/update.rs index 277f27b6..f2e70751 100644 --- a/core/archipelago/src/api/rpc/update.rs +++ b/core/archipelago/src/api/rpc/update.rs @@ -157,6 +157,16 @@ impl RpcHandler { /// Get update status without checking remote. pub(super) async fn handle_update_status(&self) -> Result { let state = update::get_status(&self.config.data_dir).await?; + // Expose live download progress so the UI can resume the + // progress bar after navigation instead of showing the fake + // creep again. An RPC poll every ~1s during download drives a + // real progress indicator that survives route changes. + let downloaded = update::DOWNLOAD_BYTES + .load(std::sync::atomic::Ordering::Relaxed); + let total = update::DOWNLOAD_TOTAL + .load(std::sync::atomic::Ordering::Relaxed); + let active = total > 0 && downloaded < total; + let completed = total > 0 && downloaded >= total; Ok(serde_json::json!({ "current_version": state.current_version, @@ -164,6 +174,13 @@ impl RpcHandler { "update_available": state.available_update.is_some(), "update_in_progress": state.update_in_progress, "rollback_available": state.rollback_available, + "download_progress": if active || completed { + Some(serde_json::json!({ + "bytes_downloaded": downloaded, + "total_bytes": total, + "active": active, + })) + } else { None }, })) } diff --git a/core/archipelago/src/update.rs b/core/archipelago/src/update.rs index 3fa9d1be..41ee5c11 100644 --- a/core/archipelago/src/update.rs +++ b/core/archipelago/src/update.rs @@ -4,9 +4,17 @@ use anyhow::{Context, Result}; use chrono::Timelike; use serde::{Deserialize, Serialize}; use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; use tokio::fs; use tracing::{debug, info}; +/// Live download progress counters. Updated by download_component_resumable +/// as bytes arrive and read by the update.status RPC so the UI can show +/// a real progress bar instead of a fake creep. Global because the +/// download runs in one place at a time; no need for per-handler state. +pub static DOWNLOAD_BYTES: AtomicU64 = AtomicU64::new(0); +pub static DOWNLOAD_TOTAL: AtomicU64 = AtomicU64::new(0); + const DEFAULT_UPDATE_MANIFEST_URL: &str = "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/manifest.json"; const UPDATE_STATE_FILE: &str = "update_state.json"; @@ -111,36 +119,57 @@ pub async fn check_for_updates(data_dir: &Path) -> Result { let mut state = load_state(data_dir).await?; info!("Checking for updates..."); + // 45s total budget, and we retry up to 3 times so a momentary + // gitea hiccup doesn't make the node report "up to date" when an + // update actually exists. Short per-attempt timeout keeps the RPC + // responsive in the common case. let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(15)) + .connect_timeout(std::time::Duration::from_secs(10)) .build() .context("Failed to create HTTP client")?; let manifest_url = update_manifest_url(); - match client.get(&manifest_url).send().await { - Ok(resp) if resp.status().is_success() => { - let manifest: UpdateManifest = resp - .json() - .await - .context("Failed to parse update manifest")?; - - if manifest.version != state.current_version { - info!( - current = %state.current_version, - available = %manifest.version, - "Update available" - ); - state.available_update = Some(manifest); - } else { - debug!("Already on latest version: {}", state.current_version); - state.available_update = None; + let mut last_err: Option = None; + let mut handled = false; + for attempt in 1..=3u8 { + if attempt > 1 { + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + } + match client.get(&manifest_url).send().await { + Ok(resp) if resp.status().is_success() => { + match resp.json::().await { + Ok(manifest) => { + if manifest.version != state.current_version { + info!( + current = %state.current_version, + available = %manifest.version, + "Update available" + ); + state.available_update = Some(manifest); + } else { + debug!("Already on latest version: {}", state.current_version); + state.available_update = None; + } + handled = true; + break; + } + Err(e) => { + last_err = Some(format!("parse: {}", e)); + } + } + } + Ok(resp) => { + last_err = Some(format!("HTTP {}", resp.status())); + } + Err(e) => { + last_err = Some(e.to_string()); } } - Ok(resp) => { - debug!("Update check returned status: {}", resp.status()); - } - Err(e) => { - debug!("Update check failed (offline?): {}", e); + } + if !handled { + if let Some(e) = last_err { + debug!("Update check failed after retries: {}", e); } } @@ -163,6 +192,14 @@ pub async fn dismiss_update(data_dir: &Path) -> Result<()> { /// Download update components to a staging directory. /// Verifies SHA256 hash for each component. +/// +/// Robustness: each component download is **resumable** via HTTP Range +/// requests and retried up to 6 times with exponential backoff. When +/// gitea drops the connection mid-stream (happens regularly at slow +/// raw-file throughput), the next attempt picks up where the previous +/// one left off instead of restarting from byte zero. SHA256 is +/// verified over the complete file at the end of each component, so a +/// partially-corrupt resume still fails cleanly. pub async fn download_update(data_dir: &Path) -> Result { let state = load_state(data_dir).await?; let manifest = state @@ -176,11 +213,8 @@ pub async fn download_update(data_dir: &Path) -> Result { .context("Failed to create staging dir")?; let client = reqwest::Client::builder() - // 1h per component — the bundled frontend+aiui tarball sits at - // ~160 MB and git.tx1138.com raw serves at ~70 KB/s which puts - // the worst case above the old 30 min cap. A larger timeout - // with a tight connect_timeout keeps hung connections from - // swallowing the whole budget. + // Per-request budget; each attempt gets the full hour. A retry + // restarts the budget cleanly. .timeout(std::time::Duration::from_secs(3600)) .connect_timeout(std::time::Duration::from_secs(30)) .build() @@ -189,49 +223,20 @@ pub async fn download_update(data_dir: &Path) -> Result { let mut downloaded = 0u64; let total_bytes: u64 = manifest.components.iter().map(|c| c.size_bytes).sum(); + // Seed the live counters so polls during the handshake show the + // right denominator immediately instead of 0/0 → NaN%. + DOWNLOAD_TOTAL.store(total_bytes, Ordering::Relaxed); + DOWNLOAD_BYTES.store(0, Ordering::Relaxed); + for component in &manifest.components { info!(name = %component.name, url = %component.download_url, "Downloading component"); - - let resp = client - .get(&component.download_url) - .send() - .await - .with_context(|| format!("Failed to download {}", component.name))?; - - if !resp.status().is_success() { - anyhow::bail!( - "Download failed for {}: HTTP {}", - component.name, - resp.status() - ); - } - - let bytes = resp - .bytes() - .await - .with_context(|| format!("Failed to read {}", component.name))?; - - // Verify SHA256 - use sha2::{Digest, Sha256}; - let hash = hex::encode(Sha256::digest(&bytes)); - if hash != component.sha256 { - anyhow::bail!( - "SHA256 mismatch for {}: expected {}, got {}", - component.name, - component.sha256, - hash - ); - } - let dest = staging_dir.join(&component.name); - fs::write(&dest, &bytes) - .await - .with_context(|| format!("Failed to write {}", component.name))?; - + download_component_resumable(&client, component, &dest, downloaded).await?; downloaded += component.size_bytes; + DOWNLOAD_BYTES.store(downloaded, Ordering::Relaxed); info!( name = %component.name, - bytes = bytes.len(), + bytes = component.size_bytes, "Component downloaded and verified" ); } @@ -249,6 +254,166 @@ pub async fn download_update(data_dir: &Path) -> Result { }) } +/// Download a single component to `dest`, resuming from the end of +/// any existing partial file via a Range request. Retries up to 6 +/// times with exponential backoff (5s, 15s, 30s, 60s, 120s, 180s). +/// Verifies the SHA256 over the full file at the end. +async fn download_component_resumable( + client: &reqwest::Client, + component: &ComponentUpdate, + dest: &Path, + prior_total: u64, +) -> Result<()> { + use sha2::{Digest, Sha256}; + use tokio::io::AsyncWriteExt; + const MAX_ATTEMPTS: u32 = 6; + const BACKOFFS: [u64; 5] = [5, 15, 30, 60, 120]; + + let mut last_err: Option = None; + for attempt in 1..=MAX_ATTEMPTS { + let existing_len = match tokio::fs::metadata(dest).await { + Ok(m) => m.len(), + Err(_) => 0, + }; + if existing_len >= component.size_bytes { + // File is already complete — break out and go verify. + break; + } + if attempt > 1 { + let delay = BACKOFFS[(attempt as usize - 2).min(BACKOFFS.len() - 1)]; + tracing::warn!( + name = %component.name, + attempt, + resume_at = existing_len, + "Retrying download in {}s (previous error: {})", + delay, + last_err.as_ref().map(|e| e.to_string()).unwrap_or_default() + ); + tokio::time::sleep(std::time::Duration::from_secs(delay)).await; + } + + let mut req = client.get(&component.download_url); + if existing_len > 0 { + req = req.header("Range", format!("bytes={}-", existing_len)); + } + let resp = match req.send().await { + Ok(r) => r, + Err(e) => { + last_err = Some(anyhow::anyhow!(e)); + continue; + } + }; + let status = resp.status(); + // 200 OK on a fresh start, 206 Partial Content on a resume + // that the server honoured. Anything else is a problem. + let is_resume = existing_len > 0 && status == reqwest::StatusCode::PARTIAL_CONTENT; + let is_fresh = existing_len == 0 && status.is_success(); + let server_ignored_range = existing_len > 0 && status == reqwest::StatusCode::OK; + if !is_resume && !is_fresh && !server_ignored_range { + last_err = Some(anyhow::anyhow!( + "HTTP {} for {} (resume offset {})", + status, + component.name, + existing_len + )); + continue; + } + // If the server ignored Range (returned 200 with the full + // body), wipe the partial file and start over. + let mut file = if server_ignored_range { + let _ = tokio::fs::remove_file(dest).await; + tokio::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(dest) + .await + .context("open staging file")? + } else if is_resume { + tokio::fs::OpenOptions::new() + .append(true) + .open(dest) + .await + .context("open staging file for append")? + } else { + tokio::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(dest) + .await + .context("open staging file")? + }; + + let mut resp = resp; + let mut stream_err = false; + let mut on_disk = existing_len; + loop { + match resp.chunk().await { + Ok(Some(bytes)) => { + if let Err(e) = file.write_all(&bytes).await { + last_err = Some(anyhow::anyhow!(e).context("writing chunk")); + stream_err = true; + break; + } + on_disk += bytes.len() as u64; + DOWNLOAD_BYTES.store( + prior_total + on_disk.min(component.size_bytes), + Ordering::Relaxed, + ); + } + Ok(None) => break, // stream ended cleanly + Err(e) => { + last_err = Some(anyhow::anyhow!(e).context("reading chunk")); + stream_err = true; + break; + } + } + } + let _ = file.flush().await; + let _ = file.sync_all().await; + drop(file); + if stream_err { + continue; + } + + // Stream ended cleanly. If we've got the expected size, verify + // the SHA and succeed. Otherwise loop to resume from the new + // offset on the next attempt. + let final_len = tokio::fs::metadata(dest) + .await + .map(|m| m.len()) + .unwrap_or(0); + if final_len < component.size_bytes { + last_err = Some(anyhow::anyhow!( + "download truncated: got {} of {} bytes", + final_len, + component.size_bytes + )); + continue; + } + + // Full file — verify hash. + let bytes = tokio::fs::read(dest) + .await + .context("read staging file for hash check")?; + let hash = hex::encode(Sha256::digest(&bytes)); + if hash == component.sha256 { + return Ok(()); + } + // SHA mismatch — the file on disk is garbage. Nuke it and + // start over from scratch on the next attempt. + let _ = tokio::fs::remove_file(dest).await; + last_err = Some(anyhow::anyhow!( + "SHA256 mismatch for {}: expected {}, got {}", + component.name, + component.sha256, + hash + )); + } + Err(last_err.unwrap_or_else(|| anyhow::anyhow!("download failed without a captured error"))) +} + /// Run a command as root, but *outside* the archipelago service's /// restricted mount namespace. /// diff --git a/neode-ui/src/views/SystemUpdate.vue b/neode-ui/src/views/SystemUpdate.vue index f7bed4ee..585cfdbb 100644 --- a/neode-ui/src/views/SystemUpdate.vue +++ b/neode-ui/src/views/SystemUpdate.vue @@ -324,6 +324,29 @@ const statusMessage = ref('') const statusIsError = ref(false) const downloadPercent = ref(0) const downloadPercentFormatted = computed(() => downloadPercent.value.toFixed(2)) + +// Poll the backend for the real bytes_downloaded / total_bytes so the +// progress bar tracks actual download state (and survives route +// changes). Returns true if a download is currently in progress. +async function pollDownloadProgress(): Promise { + try { + const res = await rpcClient.call<{ + download_progress?: { + bytes_downloaded: number + total_bytes: number + active: boolean + } | null + }>({ method: 'update.status' }) + const p = res.download_progress + if (p && p.total_bytes > 0) { + downloadPercent.value = Math.min(100, (p.bytes_downloaded / p.total_bytes) * 100) + return p.active + } + return false + } catch { + return false + } +} // Shown next to the progress bar when the fake increment has maxed out // at 95% but the real RPC hasn't returned yet — lets the user know the // UI hasn't frozen while SHA verification and disk writes finish. @@ -486,14 +509,12 @@ async function downloadUpdate() { downloadPercent.value = 0 statusMessage.value = '' - // Simulate incremental progress while waiting for the RPC. Capped at - // 95% so the bar never shows >100% before the real completion jumps it - // to 100 — previously the random increment could overshoot. - const progressInterval = setInterval(() => { - if (downloadPercent.value < 95) { - downloadPercent.value = Math.min(95, downloadPercent.value + Math.random() * 3) - } - }, 500) + // Poll the backend's real byte counter every second instead of + // faking progress. The backend exposes bytes_downloaded/total_bytes + // via update.status, updated per chunk. This also means the bar + // resumes correctly after navigating away and back — no more + // "95% for some time" mystery. + const progressInterval = setInterval(() => { void pollDownloadProgress() }, 1000) try { const res = await rpcClient.call<{ @@ -616,8 +637,24 @@ async function setSchedule(value: ScheduleValue) { } } -onMounted(() => { - Promise.all([loadStatus(), loadSchedule(), checkForUpdates()]) +onMounted(async () => { + await Promise.all([loadStatus(), loadSchedule(), checkForUpdates()]) + // If a download was already running when the user navigated here + // (or refreshed), pick up the progress bar where it is and keep + // polling until the backend reports done. No RPC call to start the + // download — the backend's already running it. + const active = await pollDownloadProgress() + if (active) { + downloading.value = true + const resumeInterval = setInterval(async () => { + const stillActive = await pollDownloadProgress() + if (!stillActive) { + clearInterval(resumeInterval) + downloading.value = false + downloaded.value = true + } + }, 1000) + } })