fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery: - Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s - Dependency-aware restarts: won't restart services before their deps - Reset dependent counters when a dependency recovers - Handle "created" state containers (were invisible to health monitor) - Added IndeedHub, mempool-api, mysql to tier system - Crash recovery: podman start timeout 30s→120s with retry - Podman client: socket timeout 5s→30s, added restart policy UI state representation: - Exit code 0 shows "stopped" (gray), not "crashed" (red) - Exit code 137 shows "killed (OOM)" - Non-zero exit shows "crashed" (red) - Added exit_code field to PackageDataEntry Install/uninstall fixes: - Install returns error when container doesn't start (was silent success) - Post-install hooks awaited instead of fire-and-forget tokio::spawn - Uninstall: graceful rm before force, volume prune, network cleanup - Uninstall returns error on partial failure (was 200 OK) Config consistency: - DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded) - Bitcoin: added ZMQ ports 28332/28333 for LND block notifications - IndeedHub port 7777→8190 (was conflicting with strfry) - Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0 Performance: - Metrics collector interval 60s→300s (was duplicating health monitor) - Podman client: proper error propagation instead of unwrap_or_default Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -389,6 +389,15 @@ pub(super) fn get_data_dirs_for_app(package_id: &str) -> Vec<String> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a secret from /var/lib/archipelago/secrets/{name}.
|
||||
/// Falls back to the provided default if the file doesn't exist.
|
||||
fn read_secret(name: &str, default: &str) -> String {
|
||||
let path = format!("/var/lib/archipelago/secrets/{}", name);
|
||||
std::fs::read_to_string(&path)
|
||||
.map(|s| s.trim().to_string())
|
||||
.unwrap_or_else(|_| default.to_string())
|
||||
}
|
||||
|
||||
/// Get app-specific configuration
|
||||
/// Returns: (ports, volumes, env_vars, custom_command, custom_args)
|
||||
pub(super) async fn get_app_config(
|
||||
@@ -413,7 +422,12 @@ pub(super) async fn get_app_config(
|
||||
None,
|
||||
),
|
||||
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => (
|
||||
vec!["8332:8332".to_string(), "8333:8333".to_string()],
|
||||
vec![
|
||||
"8332:8332".to_string(),
|
||||
"8333:8333".to_string(),
|
||||
"28332:28332".to_string(),
|
||||
"28333:28333".to_string(),
|
||||
],
|
||||
vec!["/var/lib/archipelago/bitcoin:/home/bitcoin/.bitcoin".to_string()],
|
||||
vec![],
|
||||
None,
|
||||
@@ -453,7 +467,8 @@ pub(super) async fn get_app_config(
|
||||
format!("BTCPAY_BTCRPCURL=http://{}:8332", host_ip),
|
||||
format!("BTCPAY_BTCRPCUSER={}", rpc_user),
|
||||
format!("BTCPAY_BTCRPCPASSWORD={}", rpc_pass),
|
||||
"BTCPAY_POSTGRES=User ID=btcpay;Password=btcpaypass;Host=archy-btcpay-db;Port=5432;Database=btcpay;Include Error Detail=true".to_string(),
|
||||
format!("BTCPAY_POSTGRES=User ID=btcpay;Password={};Host=archy-btcpay-db;Port=5432;Database=btcpay;Include Error Detail=true",
|
||||
read_secret("btcpay-db-password", "btcpaypass")),
|
||||
],
|
||||
None,
|
||||
None,
|
||||
@@ -481,7 +496,7 @@ pub(super) async fn get_app_config(
|
||||
"DATABASE_HOST=archy-mempool-db".to_string(),
|
||||
"DATABASE_DATABASE=mempool".to_string(),
|
||||
"DATABASE_USERNAME=mempool".to_string(),
|
||||
"DATABASE_PASSWORD=mempoolpass".to_string(),
|
||||
format!("DATABASE_PASSWORD={}", read_secret("mempool-db-password", "mempoolpass")),
|
||||
],
|
||||
None,
|
||||
None,
|
||||
@@ -511,8 +526,8 @@ pub(super) async fn get_app_config(
|
||||
vec![
|
||||
"MYSQL_DATABASE=mempool".to_string(),
|
||||
"MYSQL_USER=mempool".to_string(),
|
||||
"MYSQL_PASSWORD=mempoolpass".to_string(),
|
||||
"MYSQL_ROOT_PASSWORD=rootpass".to_string(),
|
||||
format!("MYSQL_PASSWORD={}", read_secret("mempool-db-password", "mempoolpass")),
|
||||
format!("MYSQL_ROOT_PASSWORD={}", read_secret("mempool-db-root-password", "rootpass")),
|
||||
],
|
||||
None,
|
||||
None,
|
||||
@@ -607,7 +622,7 @@ pub(super) async fn get_app_config(
|
||||
vec![
|
||||
"DB_HOSTNAME=immich_postgres".to_string(),
|
||||
"DB_USERNAME=postgres".to_string(),
|
||||
"DB_PASSWORD=immichpass".to_string(),
|
||||
format!("DB_PASSWORD={}", read_secret("immich-db-password", "immichpass")),
|
||||
"DB_DATABASE_NAME=immich".to_string(),
|
||||
"REDIS_HOSTNAME=immich_redis".to_string(),
|
||||
"UPLOAD_LOCATION=/usr/src/app/upload".to_string(),
|
||||
|
||||
@@ -256,8 +256,9 @@ impl RpcHandler {
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
// Post-start health verification: wait up to 30s for container to be running
|
||||
for i in 0..6u32 {
|
||||
// Post-start health verification: wait up to 60s for container to be running
|
||||
let mut container_running = false;
|
||||
for i in 0..12u32 {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
let status = tokio::process::Command::new("podman")
|
||||
.args(["inspect", container_name, "--format", "{{.State.Status}}"])
|
||||
@@ -266,6 +267,7 @@ impl RpcHandler {
|
||||
if let Ok(o) = status {
|
||||
let state = String::from_utf8_lossy(&o.stdout).trim().to_string();
|
||||
if state == "running" {
|
||||
container_running = true;
|
||||
break;
|
||||
}
|
||||
if state == "exited" {
|
||||
@@ -288,12 +290,19 @@ impl RpcHandler {
|
||||
));
|
||||
}
|
||||
}
|
||||
if i == 5 {
|
||||
debug!("Container {} health check timeout (30s) — continuing anyway", container_name);
|
||||
if i == 11 {
|
||||
warn!("Container {} not running after 60s — install may have failed", container_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Post-install hooks
|
||||
if !container_running {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Container {} did not reach running state within 60s. Check logs with: podman logs {}",
|
||||
container_name, container_name
|
||||
));
|
||||
}
|
||||
|
||||
// Post-install hooks — await completion before returning success
|
||||
self.run_post_install_hooks(package_id).await;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
@@ -536,98 +545,106 @@ printtoconsole=1\n",
|
||||
}
|
||||
|
||||
/// Run post-install hooks (Nextcloud trusted domains, Bitcoin UI container).
|
||||
/// Critical hooks (credential setup, config) are awaited; UI container builds are background.
|
||||
async fn run_post_install_hooks(&self, package_id: &str) {
|
||||
if package_id == "filebrowser" {
|
||||
tokio::spawn(async move {
|
||||
// Wait for filebrowser to start and initialize its database
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
// Wait for filebrowser to start and initialize its database
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
|
||||
// Generate a random password (32 bytes, hex-encoded)
|
||||
let mut buf = [0u8; 32];
|
||||
rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf);
|
||||
let password = hex::encode(buf);
|
||||
// Generate a random password (32 bytes, hex-encoded)
|
||||
let mut buf = [0u8; 32];
|
||||
rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf);
|
||||
let password = hex::encode(buf);
|
||||
|
||||
// Get a JWT token with default credentials
|
||||
let login_res = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap_or_default()
|
||||
.post("http://127.0.0.1:8083/api/login")
|
||||
.json(&serde_json::json!({"username": "admin", "password": "admin"}))
|
||||
.send()
|
||||
.await;
|
||||
// Get a JWT token with default credentials
|
||||
let client = match reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(10))
|
||||
.build()
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to create HTTP client for FileBrowser hook: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let token = match login_res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
resp.text().await.unwrap_or_default().trim_matches('"').to_string()
|
||||
}
|
||||
_ => {
|
||||
tracing::warn!("FileBrowser not ready for password change — keeping default");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let login_res = client
|
||||
.post("http://127.0.0.1:8083/api/login")
|
||||
.json(&serde_json::json!({"username": "admin", "password": "admin"}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
// Change admin password via filebrowser API
|
||||
let change_res = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap_or_default()
|
||||
.put("http://127.0.0.1:8083/api/users/1")
|
||||
.header("X-Auth", &token)
|
||||
.json(&serde_json::json!({"password": password}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match change_res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let secret_dir = "/var/lib/archipelago/secrets/filebrowser";
|
||||
let _ = tokio::fs::create_dir_all(secret_dir).await;
|
||||
let _ = tokio::fs::write(
|
||||
format!("{}/password", secret_dir),
|
||||
&password,
|
||||
).await;
|
||||
info!("FileBrowser admin password secured (default credentials replaced)");
|
||||
}
|
||||
Ok(resp) => {
|
||||
tracing::warn!("FileBrowser password change failed: {}", resp.status());
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("FileBrowser password change error: {}", e);
|
||||
let token = match login_res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
match resp.text().await {
|
||||
Ok(t) => t.trim_matches('"').to_string(),
|
||||
Err(e) => {
|
||||
tracing::warn!("FileBrowser login response parse failed: {}", e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
_ => {
|
||||
tracing::warn!("FileBrowser not ready for password change — keeping default");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Change admin password via filebrowser API
|
||||
let change_res = client
|
||||
.put("http://127.0.0.1:8083/api/users/1")
|
||||
.header("X-Auth", &token)
|
||||
.json(&serde_json::json!({"password": password}))
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match change_res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let secret_dir = "/var/lib/archipelago/secrets/filebrowser";
|
||||
let _ = tokio::fs::create_dir_all(secret_dir).await;
|
||||
let _ = tokio::fs::write(
|
||||
format!("{}/password", secret_dir),
|
||||
&password,
|
||||
).await;
|
||||
info!("FileBrowser admin password secured (default credentials replaced)");
|
||||
}
|
||||
Ok(resp) => {
|
||||
tracing::warn!("FileBrowser password change failed: {}", resp.status());
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("FileBrowser password change error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if package_id == "nextcloud" {
|
||||
let host_ip = self.config.host_ip.clone();
|
||||
tokio::spawn(async move {
|
||||
// Wait for Nextcloud to finish first-run initialization
|
||||
tokio::time::sleep(std::time::Duration::from_secs(30)).await;
|
||||
for domain_idx in 1..=2u8 {
|
||||
let value = if domain_idx == 1 {
|
||||
host_ip.as_str()
|
||||
} else {
|
||||
"localhost"
|
||||
};
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args([
|
||||
"exec",
|
||||
"-u",
|
||||
"33",
|
||||
"nextcloud",
|
||||
"php",
|
||||
"occ",
|
||||
"config:system:set",
|
||||
"trusted_domains",
|
||||
&domain_idx.to_string(),
|
||||
"--value",
|
||||
value,
|
||||
])
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
info!("Nextcloud trusted domains configured for {}", host_ip);
|
||||
});
|
||||
let host_ip = &self.config.host_ip;
|
||||
// Wait for Nextcloud to finish first-run initialization
|
||||
tokio::time::sleep(std::time::Duration::from_secs(30)).await;
|
||||
for domain_idx in 1..=2u8 {
|
||||
let value = if domain_idx == 1 {
|
||||
host_ip.as_str()
|
||||
} else {
|
||||
"localhost"
|
||||
};
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args([
|
||||
"exec",
|
||||
"-u",
|
||||
"33",
|
||||
"nextcloud",
|
||||
"php",
|
||||
"occ",
|
||||
"config:system:set",
|
||||
"trusted_domains",
|
||||
&domain_idx.to_string(),
|
||||
"--value",
|
||||
value,
|
||||
])
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
info!("Nextcloud trusted domains configured for {}", host_ip);
|
||||
}
|
||||
|
||||
// Build and start companion UI containers for headless services
|
||||
|
||||
@@ -58,6 +58,7 @@ fn create_installing_entry(package_id: &str) -> PackageDataEntry {
|
||||
PackageDataEntry {
|
||||
state: PackageState::Installing,
|
||||
health: None,
|
||||
exit_code: None,
|
||||
static_files: StaticFiles {
|
||||
license: String::new(),
|
||||
instructions: String::new(),
|
||||
|
||||
@@ -221,18 +221,30 @@ impl RpcHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// Remove container (without -f to respect graceful shutdown above)
|
||||
tracing::info!("Uninstall {}: removing container {}", package_id, name);
|
||||
let rm_out = tokio::process::Command::new("podman")
|
||||
.args(["rm", "-f", name])
|
||||
.args(["rm", name])
|
||||
.output()
|
||||
.await;
|
||||
match rm_out {
|
||||
Ok(o) if o.status.success() => removed += 1,
|
||||
Ok(o) => {
|
||||
// If normal rm fails (e.g., still running), force as fallback
|
||||
let stderr = String::from_utf8_lossy(&o.stderr);
|
||||
let msg = format!("Failed to remove {}: {}", name, stderr.trim());
|
||||
tracing::error!("Uninstall {}: {}", package_id, msg);
|
||||
errors.push(msg);
|
||||
tracing::warn!("Uninstall {}: rm {} failed ({}), trying force", package_id, name, stderr.trim());
|
||||
let force_rm = tokio::process::Command::new("podman")
|
||||
.args(["rm", "-f", name])
|
||||
.output()
|
||||
.await;
|
||||
match force_rm {
|
||||
Ok(o2) if o2.status.success() => removed += 1,
|
||||
_ => {
|
||||
let msg = format!("Failed to remove {}: {}", name, stderr.trim());
|
||||
tracing::error!("Uninstall {}: {}", package_id, msg);
|
||||
errors.push(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = format!("Failed to remove {}: {}", name, e);
|
||||
@@ -242,6 +254,26 @@ impl RpcHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up dangling volumes associated with removed containers
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["volume", "prune", "-f"])
|
||||
.output()
|
||||
.await;
|
||||
|
||||
// Clean up app-specific networks (only if no other containers use them)
|
||||
let app_networks: Vec<&str> = match package_id {
|
||||
"immich" | "immich_server" => vec!["immich-net"],
|
||||
"penpot" | "penpot-frontend" => vec!["penpot-net"],
|
||||
"indeedhub" | "indeedhub-api" => vec!["indeedhub-net"],
|
||||
_ => vec![],
|
||||
};
|
||||
for net in &app_networks {
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["network", "rm", net])
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
|
||||
// Release port allocation
|
||||
{
|
||||
let mut allocator = self.port_allocator.lock().await;
|
||||
@@ -257,10 +289,19 @@ impl RpcHandler {
|
||||
.args(["rm", "-rf", dir])
|
||||
.output()
|
||||
.await;
|
||||
if let Ok(o) = rm_out {
|
||||
if !o.status.success() {
|
||||
tracing::warn!("Uninstall {}: rm {} failed", package_id, dir);
|
||||
match rm_out {
|
||||
Ok(o) if !o.status.success() => {
|
||||
let stderr = String::from_utf8_lossy(&o.stderr);
|
||||
let msg = format!("Failed to remove data {}: {}", dir, stderr.trim());
|
||||
tracing::error!("Uninstall {}: {}", package_id, msg);
|
||||
errors.push(msg);
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = format!("Failed to remove data {}: {}", dir, e);
|
||||
tracing::error!("Uninstall {}: {}", package_id, msg);
|
||||
errors.push(msg);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -271,20 +312,24 @@ impl RpcHandler {
|
||||
package_id,
|
||||
errors
|
||||
);
|
||||
} else {
|
||||
tracing::info!(
|
||||
"Uninstall {} complete: stopped={}, removed={}",
|
||||
return Err(anyhow::anyhow!(
|
||||
"Uninstall {} partially failed: {}",
|
||||
package_id,
|
||||
stopped,
|
||||
removed
|
||||
);
|
||||
errors.join("; ")
|
||||
));
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Uninstall {} complete: stopped={}, removed={}",
|
||||
package_id,
|
||||
stopped,
|
||||
removed
|
||||
);
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"status": if errors.is_empty() { "uninstalled" } else { "partial" },
|
||||
"status": "uninstalled",
|
||||
"stopped": stopped,
|
||||
"removed": removed,
|
||||
"errors": errors,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user