fix: bulletproof first-boot container creation and install reliability

Remove the Bitcoin RPC 60-second gate that blocked 13+ dependent containers
(mempool, electrumx, btcpay, lnd, fedimint) from being created on first boot.
Containers now always get created and auto-restart via health monitor once
Bitcoin becomes responsive — the designed recovery path.

Additional hardening:
- Validate archy-net creation with retry (silent failure broke DNS)
- Verify critical images are loaded, re-load from tarballs if missing
- Create SearXNG settings.yml before container start (was missing)
- Run reconciler automatically after first-boot failures
- Add load-images as explicit systemd dependency with 900s timeout
- Propagate config write errors in install.rs (bitcoin.conf, lnd.conf)
- FileBrowser password change: retry loop (6 attempts) + 0o600 perms
- Post-start verification: detect containers that exit immediately
- Add 2s dependency waits between container starts

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-03-31 18:31:00 +01:00
parent a896ecd431
commit 08f7f58a9d
5 changed files with 239 additions and 68 deletions

View File

@@ -222,12 +222,12 @@ impl RpcHandler {
// Pre-install: bitcoin.conf with rpcauth
if matches!(package_id, "bitcoin" | "bitcoin-core" | "bitcoin-knots") {
self.write_bitcoin_conf(&rpc_user, &rpc_pass).await;
self.write_bitcoin_conf(&rpc_user, &rpc_pass).await?;
}
// Pre-install: lnd.conf with Bitcoin RPC credentials
if package_id == "lnd" {
self.write_lnd_conf(&rpc_user, &rpc_pass).await;
self.write_lnd_conf(&rpc_user, &rpc_pass).await?;
}
// Pre-install: SearXNG settings.yml (required or container exits immediately)
@@ -241,7 +241,12 @@ impl RpcHandler {
"use_default_settings: true\ngeneral:\n instance_name: Archipelago Search\nserver:\n secret_key: \"{}\"\n bind_address: \"0.0.0.0\"\n port: 8080\n limiter: false\nui:\n default_theme: simple\n",
secret_hex
);
let _ = tokio::fs::write(&settings_path, settings).await;
tokio::fs::create_dir_all(searx_dir)
.await
.context("Failed to create SearXNG config directory")?;
tokio::fs::write(&settings_path, settings)
.await
.context("Failed to write SearXNG settings.yml")?;
info!("Created SearXNG settings.yml");
}
}
@@ -580,7 +585,7 @@ impl RpcHandler {
}
/// Write bitcoin.conf with rpcauth (salted HMAC hash, no plaintext password).
async fn write_bitcoin_conf(&self, rpc_user: &str, rpc_pass: &str) {
async fn write_bitcoin_conf(&self, rpc_user: &str, rpc_pass: &str) -> Result<()> {
let bitcoin_dir = "/var/lib/archipelago/bitcoin";
let conf_path = format!("{}/bitcoin.conf", bitcoin_dir);
@@ -607,20 +612,25 @@ listen=1\n\
printtoconsole=1\n",
rpcauth_line
);
let _ = tokio::fs::create_dir_all(bitcoin_dir).await;
let _ = tokio::fs::write(&conf_path, bitcoin_conf).await;
tokio::fs::create_dir_all(bitcoin_dir)
.await
.context("Failed to create bitcoin data directory")?;
tokio::fs::write(&conf_path, bitcoin_conf)
.await
.context("Failed to write bitcoin.conf")?;
info!("Created bitcoin.conf with rpcauth (no plaintext credentials)");
Ok(())
}
/// Write LND config file with Bitcoin RPC credentials.
async fn write_lnd_conf(&self, rpc_user: &str, rpc_pass: &str) {
async fn write_lnd_conf(&self, rpc_user: &str, rpc_pass: &str) -> Result<()> {
let lnd_dir = "/var/lib/archipelago/lnd";
let conf_path = format!("{}/lnd.conf", lnd_dir);
// Don't overwrite existing config (user may have customized it)
if tokio::fs::try_exists(&conf_path).await.unwrap_or(false) {
info!("lnd.conf already exists, skipping write");
return;
return Ok(());
}
let lnd_conf = format!(
@@ -648,24 +658,25 @@ autopilot.active=false\n",
user = rpc_user,
pass = rpc_pass,
);
let _ = tokio::fs::create_dir_all(lnd_dir).await;
let _ = tokio::fs::write(&conf_path, lnd_conf).await;
tokio::fs::create_dir_all(lnd_dir)
.await
.context("Failed to create LND data directory")?;
tokio::fs::write(&conf_path, lnd_conf)
.await
.context("Failed to write lnd.conf")?;
info!("Created lnd.conf with Bitcoin RPC credentials");
Ok(())
}
/// Run post-install hooks (Nextcloud trusted domains, Bitcoin UI container).
/// Critical hooks (credential setup, config) are awaited; UI container builds are background.
async fn run_post_install_hooks(&self, package_id: &str) {
if package_id == "filebrowser" {
// Wait for filebrowser to start and initialize its database
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
// Generate a random password (32 bytes, hex-encoded)
let mut buf = [0u8; 32];
rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf);
let password = hex::encode(buf);
// Get a JWT token with default credentials
let client = match reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(10))
.build()
@@ -677,53 +688,72 @@ autopilot.active=false\n",
}
};
let login_res = client
.post("http://127.0.0.1:8083/api/login")
.json(&serde_json::json!({"username": "admin", "password": "admin"}))
.send()
.await;
// Retry loop: FileBrowser may take time to initialize its SQLite database
let mut password_changed = false;
for attempt in 0..6u32 {
let delay = if attempt == 0 { 5 } else { 10 };
tokio::time::sleep(std::time::Duration::from_secs(delay)).await;
let token = match login_res {
Ok(resp) if resp.status().is_success() => {
match resp.text().await {
Ok(t) => t.trim_matches('"').to_string(),
Err(e) => {
tracing::warn!("FileBrowser login response parse failed: {}", e);
return;
// Try to log in with default credentials
let login_res = client
.post("http://127.0.0.1:8083/api/login")
.json(&serde_json::json!({"username": "admin", "password": "admin"}))
.send()
.await;
let token = match login_res {
Ok(resp) if resp.status().is_success() => {
match resp.text().await {
Ok(t) => t.trim_matches('"').to_string(),
Err(_) => continue,
}
}
}
_ => {
tracing::warn!("FileBrowser not ready for password change — keeping default");
return;
}
};
_ => {
debug!("FileBrowser not ready (attempt {}/6)", attempt + 1);
continue;
}
};
// Change admin password via filebrowser API
let change_res = client
.put("http://127.0.0.1:8083/api/users/1")
.header("X-Auth", &token)
.json(&serde_json::json!({"password": password}))
.send()
.await;
// Change admin password
let change_res = client
.put("http://127.0.0.1:8083/api/users/1")
.header("X-Auth", &token)
.json(&serde_json::json!({"password": password}))
.send()
.await;
match change_res {
Ok(resp) if resp.status().is_success() => {
let secret_dir = "/var/lib/archipelago/secrets/filebrowser";
let _ = tokio::fs::create_dir_all(secret_dir).await;
let _ = tokio::fs::write(
format!("{}/password", secret_dir),
&password,
).await;
info!("FileBrowser admin password secured (default credentials replaced)");
}
Ok(resp) => {
tracing::warn!("FileBrowser password change failed: {}", resp.status());
}
Err(e) => {
tracing::warn!("FileBrowser password change error: {}", e);
match change_res {
Ok(resp) if resp.status().is_success() => {
let secret_dir = "/var/lib/archipelago/secrets/filebrowser";
if let Err(e) = tokio::fs::create_dir_all(secret_dir).await {
tracing::warn!("Failed to create filebrowser secrets dir: {}", e);
}
let pw_path = format!("{}/password", secret_dir);
if let Err(e) = tokio::fs::write(&pw_path, &password).await {
tracing::warn!("Failed to write filebrowser password: {}", e);
}
// Set restrictive permissions on the password file
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ = std::fs::set_permissions(
&pw_path,
std::fs::Permissions::from_mode(0o600),
);
}
info!("FileBrowser admin password secured (default credentials replaced)");
password_changed = true;
break;
}
_ => continue,
}
}
if !password_changed {
tracing::warn!(
"FileBrowser password could not be changed after 6 attempts — \
default credentials (admin/admin) remain active"
);
}
}
if package_id == "nextcloud" {

View File

@@ -48,7 +48,11 @@ impl RpcHandler {
install_log(&format!("START: {} (containers: {:?})", package_id, to_start)).await;
let mut errors = Vec::new();
for name in &to_start {
for (i, name) in to_start.iter().enumerate() {
// Brief delay between dependent containers to allow initialization
if i > 0 {
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
tracing::info!("Starting container: {}", name);
let out = tokio::process::Command::new("podman")
.args(["start", name])
@@ -66,6 +70,45 @@ impl RpcHandler {
if !errors.is_empty() {
return Err(anyhow::anyhow!("Start failed: {}", errors.join("; ")));
}
// Verify containers actually reached running state (podman start can
// succeed even if the container exits immediately after)
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
for name in &to_start {
let status = tokio::process::Command::new("podman")
.args(["inspect", name, "--format", "{{.State.Status}}"])
.output()
.await;
if let Ok(o) = status {
let state = String::from_utf8_lossy(&o.stdout).trim().to_string();
if state == "exited" {
let logs = tokio::process::Command::new("podman")
.args(["logs", "--tail", "5", name])
.output()
.await;
let log_text = logs
.map(|o| {
let combined = format!(
"{}{}",
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr)
);
combined.chars().take(200).collect::<String>()
})
.unwrap_or_default();
tracing::error!("Container {} exited after start: {}", name, log_text);
install_log(&format!("START EXITED: {}{}", name, log_text)).await;
errors.push(format!("{}: exited after start", name));
}
}
}
if !errors.is_empty() {
return Err(anyhow::anyhow!(
"Containers exited after start: {}",
errors.join("; ")
));
}
Ok(serde_json::Value::Null)
}