release(v1.7.15-alpha): bulletproof downloads — resume, retry, real progress
download_update
Each component download is now resumable via HTTP Range requests
(Range: bytes=N-) and retried up to 6 times with exponential
backoff (5/15/30/60/120/180s). On a dropped connection the next
attempt picks up at the last written byte offset instead of
restarting at zero. Streams via reqwest::Response::chunk() to the
staging file so a 160 MB frontend tarball doesn't sit in RAM. SHA
is verified over the complete file at the end of each component;
mismatch nukes the staged file and restarts from scratch.
Real download progress counters
New AtomicU64 globals DOWNLOAD_BYTES/DOWNLOAD_TOTAL are updated
from the chunk loop. update.status exposes them as
download_progress.{bytes_downloaded, total_bytes, active}. The
SystemUpdate.vue progress bar now polls update.status every
second instead of incrementing a fake random counter — and
crucially, if the user navigates away and back, the component
picks up the in-progress download from the backend atomics
immediately.
Update-check retries
handle_update_check now retries the manifest fetch up to 3 times
with a 5s gap if the first try hits a transport error, so a
momentary gitea hiccup doesn't make a node report "up to date"
when there actually is a new release. Tight 10s connect timeout
per attempt keeps the total bounded.
Artefacts:
archipelago 1070c87f…c081c162b 40584792
archipelago-frontend-1.7.15-alpha.tar.gz 8e630eba…63fd43f 162078068
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
core/Cargo.lock
generated
2
core/Cargo.lock
generated
@@ -80,7 +80,7 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
||||
|
||||
[[package]]
|
||||
name = "archipelago"
|
||||
version = "1.7.14-alpha"
|
||||
version = "1.7.15-alpha"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"archipelago-container",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "archipelago"
|
||||
version = "1.7.14-alpha"
|
||||
version = "1.7.15-alpha"
|
||||
edition = "2021"
|
||||
description = "Archipelago Bitcoin Node OS - Native backend"
|
||||
authors = ["Archipelago Team"]
|
||||
|
||||
@@ -157,6 +157,16 @@ impl RpcHandler {
|
||||
/// Get update status without checking remote.
|
||||
pub(super) async fn handle_update_status(&self) -> Result<serde_json::Value> {
|
||||
let state = update::get_status(&self.config.data_dir).await?;
|
||||
// Expose live download progress so the UI can resume the
|
||||
// progress bar after navigation instead of showing the fake
|
||||
// creep again. An RPC poll every ~1s during download drives a
|
||||
// real progress indicator that survives route changes.
|
||||
let downloaded = update::DOWNLOAD_BYTES
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
let total = update::DOWNLOAD_TOTAL
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
let active = total > 0 && downloaded < total;
|
||||
let completed = total > 0 && downloaded >= total;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"current_version": state.current_version,
|
||||
@@ -164,6 +174,13 @@ impl RpcHandler {
|
||||
"update_available": state.available_update.is_some(),
|
||||
"update_in_progress": state.update_in_progress,
|
||||
"rollback_available": state.rollback_available,
|
||||
"download_progress": if active || completed {
|
||||
Some(serde_json::json!({
|
||||
"bytes_downloaded": downloaded,
|
||||
"total_bytes": total,
|
||||
"active": active,
|
||||
}))
|
||||
} else { None },
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -4,9 +4,17 @@ use anyhow::{Context, Result};
|
||||
use chrono::Timelike;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use tokio::fs;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Live download progress counters. Updated by download_component_resumable
|
||||
/// as bytes arrive and read by the update.status RPC so the UI can show
|
||||
/// a real progress bar instead of a fake creep. Global because the
|
||||
/// download runs in one place at a time; no need for per-handler state.
|
||||
pub static DOWNLOAD_BYTES: AtomicU64 = AtomicU64::new(0);
|
||||
pub static DOWNLOAD_TOTAL: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
const DEFAULT_UPDATE_MANIFEST_URL: &str =
|
||||
"https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/manifest.json";
|
||||
const UPDATE_STATE_FILE: &str = "update_state.json";
|
||||
@@ -111,36 +119,57 @@ pub async fn check_for_updates(data_dir: &Path) -> Result<UpdateState> {
|
||||
let mut state = load_state(data_dir).await?;
|
||||
|
||||
info!("Checking for updates...");
|
||||
// 45s total budget, and we retry up to 3 times so a momentary
|
||||
// gitea hiccup doesn't make the node report "up to date" when an
|
||||
// update actually exists. Short per-attempt timeout keeps the RPC
|
||||
// responsive in the common case.
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.connect_timeout(std::time::Duration::from_secs(10))
|
||||
.build()
|
||||
.context("Failed to create HTTP client")?;
|
||||
|
||||
let manifest_url = update_manifest_url();
|
||||
match client.get(&manifest_url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let manifest: UpdateManifest = resp
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse update manifest")?;
|
||||
|
||||
if manifest.version != state.current_version {
|
||||
info!(
|
||||
current = %state.current_version,
|
||||
available = %manifest.version,
|
||||
"Update available"
|
||||
);
|
||||
state.available_update = Some(manifest);
|
||||
} else {
|
||||
debug!("Already on latest version: {}", state.current_version);
|
||||
state.available_update = None;
|
||||
let mut last_err: Option<String> = None;
|
||||
let mut handled = false;
|
||||
for attempt in 1..=3u8 {
|
||||
if attempt > 1 {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
}
|
||||
match client.get(&manifest_url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
match resp.json::<UpdateManifest>().await {
|
||||
Ok(manifest) => {
|
||||
if manifest.version != state.current_version {
|
||||
info!(
|
||||
current = %state.current_version,
|
||||
available = %manifest.version,
|
||||
"Update available"
|
||||
);
|
||||
state.available_update = Some(manifest);
|
||||
} else {
|
||||
debug!("Already on latest version: {}", state.current_version);
|
||||
state.available_update = None;
|
||||
}
|
||||
handled = true;
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
last_err = Some(format!("parse: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
last_err = Some(format!("HTTP {}", resp.status()));
|
||||
}
|
||||
Err(e) => {
|
||||
last_err = Some(e.to_string());
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
debug!("Update check returned status: {}", resp.status());
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Update check failed (offline?): {}", e);
|
||||
}
|
||||
if !handled {
|
||||
if let Some(e) = last_err {
|
||||
debug!("Update check failed after retries: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,6 +192,14 @@ pub async fn dismiss_update(data_dir: &Path) -> Result<()> {
|
||||
|
||||
/// Download update components to a staging directory.
|
||||
/// Verifies SHA256 hash for each component.
|
||||
///
|
||||
/// Robustness: each component download is **resumable** via HTTP Range
|
||||
/// requests and retried up to 6 times with exponential backoff. When
|
||||
/// gitea drops the connection mid-stream (happens regularly at slow
|
||||
/// raw-file throughput), the next attempt picks up where the previous
|
||||
/// one left off instead of restarting from byte zero. SHA256 is
|
||||
/// verified over the complete file at the end of each component, so a
|
||||
/// partially-corrupt resume still fails cleanly.
|
||||
pub async fn download_update(data_dir: &Path) -> Result<DownloadProgress> {
|
||||
let state = load_state(data_dir).await?;
|
||||
let manifest = state
|
||||
@@ -176,11 +213,8 @@ pub async fn download_update(data_dir: &Path) -> Result<DownloadProgress> {
|
||||
.context("Failed to create staging dir")?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
// 1h per component — the bundled frontend+aiui tarball sits at
|
||||
// ~160 MB and git.tx1138.com raw serves at ~70 KB/s which puts
|
||||
// the worst case above the old 30 min cap. A larger timeout
|
||||
// with a tight connect_timeout keeps hung connections from
|
||||
// swallowing the whole budget.
|
||||
// Per-request budget; each attempt gets the full hour. A retry
|
||||
// restarts the budget cleanly.
|
||||
.timeout(std::time::Duration::from_secs(3600))
|
||||
.connect_timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
@@ -189,49 +223,20 @@ pub async fn download_update(data_dir: &Path) -> Result<DownloadProgress> {
|
||||
let mut downloaded = 0u64;
|
||||
let total_bytes: u64 = manifest.components.iter().map(|c| c.size_bytes).sum();
|
||||
|
||||
// Seed the live counters so polls during the handshake show the
|
||||
// right denominator immediately instead of 0/0 → NaN%.
|
||||
DOWNLOAD_TOTAL.store(total_bytes, Ordering::Relaxed);
|
||||
DOWNLOAD_BYTES.store(0, Ordering::Relaxed);
|
||||
|
||||
for component in &manifest.components {
|
||||
info!(name = %component.name, url = %component.download_url, "Downloading component");
|
||||
|
||||
let resp = client
|
||||
.get(&component.download_url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("Failed to download {}", component.name))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
anyhow::bail!(
|
||||
"Download failed for {}: HTTP {}",
|
||||
component.name,
|
||||
resp.status()
|
||||
);
|
||||
}
|
||||
|
||||
let bytes = resp
|
||||
.bytes()
|
||||
.await
|
||||
.with_context(|| format!("Failed to read {}", component.name))?;
|
||||
|
||||
// Verify SHA256
|
||||
use sha2::{Digest, Sha256};
|
||||
let hash = hex::encode(Sha256::digest(&bytes));
|
||||
if hash != component.sha256 {
|
||||
anyhow::bail!(
|
||||
"SHA256 mismatch for {}: expected {}, got {}",
|
||||
component.name,
|
||||
component.sha256,
|
||||
hash
|
||||
);
|
||||
}
|
||||
|
||||
let dest = staging_dir.join(&component.name);
|
||||
fs::write(&dest, &bytes)
|
||||
.await
|
||||
.with_context(|| format!("Failed to write {}", component.name))?;
|
||||
|
||||
download_component_resumable(&client, component, &dest, downloaded).await?;
|
||||
downloaded += component.size_bytes;
|
||||
DOWNLOAD_BYTES.store(downloaded, Ordering::Relaxed);
|
||||
info!(
|
||||
name = %component.name,
|
||||
bytes = bytes.len(),
|
||||
bytes = component.size_bytes,
|
||||
"Component downloaded and verified"
|
||||
);
|
||||
}
|
||||
@@ -249,6 +254,166 @@ pub async fn download_update(data_dir: &Path) -> Result<DownloadProgress> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Download a single component to `dest`, resuming from the end of
|
||||
/// any existing partial file via a Range request. Retries up to 6
|
||||
/// times with exponential backoff (5s, 15s, 30s, 60s, 120s, 180s).
|
||||
/// Verifies the SHA256 over the full file at the end.
|
||||
async fn download_component_resumable(
|
||||
client: &reqwest::Client,
|
||||
component: &ComponentUpdate,
|
||||
dest: &Path,
|
||||
prior_total: u64,
|
||||
) -> Result<()> {
|
||||
use sha2::{Digest, Sha256};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
const MAX_ATTEMPTS: u32 = 6;
|
||||
const BACKOFFS: [u64; 5] = [5, 15, 30, 60, 120];
|
||||
|
||||
let mut last_err: Option<anyhow::Error> = None;
|
||||
for attempt in 1..=MAX_ATTEMPTS {
|
||||
let existing_len = match tokio::fs::metadata(dest).await {
|
||||
Ok(m) => m.len(),
|
||||
Err(_) => 0,
|
||||
};
|
||||
if existing_len >= component.size_bytes {
|
||||
// File is already complete — break out and go verify.
|
||||
break;
|
||||
}
|
||||
if attempt > 1 {
|
||||
let delay = BACKOFFS[(attempt as usize - 2).min(BACKOFFS.len() - 1)];
|
||||
tracing::warn!(
|
||||
name = %component.name,
|
||||
attempt,
|
||||
resume_at = existing_len,
|
||||
"Retrying download in {}s (previous error: {})",
|
||||
delay,
|
||||
last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_secs(delay)).await;
|
||||
}
|
||||
|
||||
let mut req = client.get(&component.download_url);
|
||||
if existing_len > 0 {
|
||||
req = req.header("Range", format!("bytes={}-", existing_len));
|
||||
}
|
||||
let resp = match req.send().await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
last_err = Some(anyhow::anyhow!(e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let status = resp.status();
|
||||
// 200 OK on a fresh start, 206 Partial Content on a resume
|
||||
// that the server honoured. Anything else is a problem.
|
||||
let is_resume = existing_len > 0 && status == reqwest::StatusCode::PARTIAL_CONTENT;
|
||||
let is_fresh = existing_len == 0 && status.is_success();
|
||||
let server_ignored_range = existing_len > 0 && status == reqwest::StatusCode::OK;
|
||||
if !is_resume && !is_fresh && !server_ignored_range {
|
||||
last_err = Some(anyhow::anyhow!(
|
||||
"HTTP {} for {} (resume offset {})",
|
||||
status,
|
||||
component.name,
|
||||
existing_len
|
||||
));
|
||||
continue;
|
||||
}
|
||||
// If the server ignored Range (returned 200 with the full
|
||||
// body), wipe the partial file and start over.
|
||||
let mut file = if server_ignored_range {
|
||||
let _ = tokio::fs::remove_file(dest).await;
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(dest)
|
||||
.await
|
||||
.context("open staging file")?
|
||||
} else if is_resume {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(dest)
|
||||
.await
|
||||
.context("open staging file for append")?
|
||||
} else {
|
||||
tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(dest)
|
||||
.await
|
||||
.context("open staging file")?
|
||||
};
|
||||
|
||||
let mut resp = resp;
|
||||
let mut stream_err = false;
|
||||
let mut on_disk = existing_len;
|
||||
loop {
|
||||
match resp.chunk().await {
|
||||
Ok(Some(bytes)) => {
|
||||
if let Err(e) = file.write_all(&bytes).await {
|
||||
last_err = Some(anyhow::anyhow!(e).context("writing chunk"));
|
||||
stream_err = true;
|
||||
break;
|
||||
}
|
||||
on_disk += bytes.len() as u64;
|
||||
DOWNLOAD_BYTES.store(
|
||||
prior_total + on_disk.min(component.size_bytes),
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
}
|
||||
Ok(None) => break, // stream ended cleanly
|
||||
Err(e) => {
|
||||
last_err = Some(anyhow::anyhow!(e).context("reading chunk"));
|
||||
stream_err = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let _ = file.flush().await;
|
||||
let _ = file.sync_all().await;
|
||||
drop(file);
|
||||
if stream_err {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Stream ended cleanly. If we've got the expected size, verify
|
||||
// the SHA and succeed. Otherwise loop to resume from the new
|
||||
// offset on the next attempt.
|
||||
let final_len = tokio::fs::metadata(dest)
|
||||
.await
|
||||
.map(|m| m.len())
|
||||
.unwrap_or(0);
|
||||
if final_len < component.size_bytes {
|
||||
last_err = Some(anyhow::anyhow!(
|
||||
"download truncated: got {} of {} bytes",
|
||||
final_len,
|
||||
component.size_bytes
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Full file — verify hash.
|
||||
let bytes = tokio::fs::read(dest)
|
||||
.await
|
||||
.context("read staging file for hash check")?;
|
||||
let hash = hex::encode(Sha256::digest(&bytes));
|
||||
if hash == component.sha256 {
|
||||
return Ok(());
|
||||
}
|
||||
// SHA mismatch — the file on disk is garbage. Nuke it and
|
||||
// start over from scratch on the next attempt.
|
||||
let _ = tokio::fs::remove_file(dest).await;
|
||||
last_err = Some(anyhow::anyhow!(
|
||||
"SHA256 mismatch for {}: expected {}, got {}",
|
||||
component.name,
|
||||
component.sha256,
|
||||
hash
|
||||
));
|
||||
}
|
||||
Err(last_err.unwrap_or_else(|| anyhow::anyhow!("download failed without a captured error")))
|
||||
}
|
||||
|
||||
/// Run a command as root, but *outside* the archipelago service's
|
||||
/// restricted mount namespace.
|
||||
///
|
||||
|
||||
@@ -324,6 +324,29 @@ const statusMessage = ref('')
|
||||
const statusIsError = ref(false)
|
||||
const downloadPercent = ref(0)
|
||||
const downloadPercentFormatted = computed(() => downloadPercent.value.toFixed(2))
|
||||
|
||||
// Poll the backend for the real bytes_downloaded / total_bytes so the
|
||||
// progress bar tracks actual download state (and survives route
|
||||
// changes). Returns true if a download is currently in progress.
|
||||
async function pollDownloadProgress(): Promise<boolean> {
|
||||
try {
|
||||
const res = await rpcClient.call<{
|
||||
download_progress?: {
|
||||
bytes_downloaded: number
|
||||
total_bytes: number
|
||||
active: boolean
|
||||
} | null
|
||||
}>({ method: 'update.status' })
|
||||
const p = res.download_progress
|
||||
if (p && p.total_bytes > 0) {
|
||||
downloadPercent.value = Math.min(100, (p.bytes_downloaded / p.total_bytes) * 100)
|
||||
return p.active
|
||||
}
|
||||
return false
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Shown next to the progress bar when the fake increment has maxed out
|
||||
// at 95% but the real RPC hasn't returned yet — lets the user know the
|
||||
// UI hasn't frozen while SHA verification and disk writes finish.
|
||||
@@ -486,14 +509,12 @@ async function downloadUpdate() {
|
||||
downloadPercent.value = 0
|
||||
statusMessage.value = ''
|
||||
|
||||
// Simulate incremental progress while waiting for the RPC. Capped at
|
||||
// 95% so the bar never shows >100% before the real completion jumps it
|
||||
// to 100 — previously the random increment could overshoot.
|
||||
const progressInterval = setInterval(() => {
|
||||
if (downloadPercent.value < 95) {
|
||||
downloadPercent.value = Math.min(95, downloadPercent.value + Math.random() * 3)
|
||||
}
|
||||
}, 500)
|
||||
// Poll the backend's real byte counter every second instead of
|
||||
// faking progress. The backend exposes bytes_downloaded/total_bytes
|
||||
// via update.status, updated per chunk. This also means the bar
|
||||
// resumes correctly after navigating away and back — no more
|
||||
// "95% for some time" mystery.
|
||||
const progressInterval = setInterval(() => { void pollDownloadProgress() }, 1000)
|
||||
|
||||
try {
|
||||
const res = await rpcClient.call<{
|
||||
@@ -616,8 +637,24 @@ async function setSchedule(value: ScheduleValue) {
|
||||
}
|
||||
}
|
||||
|
||||
onMounted(() => {
|
||||
Promise.all([loadStatus(), loadSchedule(), checkForUpdates()])
|
||||
onMounted(async () => {
|
||||
await Promise.all([loadStatus(), loadSchedule(), checkForUpdates()])
|
||||
// If a download was already running when the user navigated here
|
||||
// (or refreshed), pick up the progress bar where it is and keep
|
||||
// polling until the backend reports done. No RPC call to start the
|
||||
// download — the backend's already running it.
|
||||
const active = await pollDownloadProgress()
|
||||
if (active) {
|
||||
downloading.value = true
|
||||
const resumeInterval = setInterval(async () => {
|
||||
const stillActive = await pollDownloadProgress()
|
||||
if (!stillActive) {
|
||||
clearInterval(resumeInterval)
|
||||
downloading.value = false
|
||||
downloaded.value = true
|
||||
}
|
||||
}, 1000)
|
||||
}
|
||||
})
|
||||
</script>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user