fix: container orchestration stability, AIUI inclusion, lnd-ui port, version 1.3.0
Some checks failed
Build Archipelago ISO (dev) / build-iso (push) Failing after 6m0s
Build Archipelago ISO / build-iso (push) Failing after 41m40s

Container stability:
- Merge scan results instead of full replacement (prevents UI flapping)
- Absence threshold: 3 consecutive missed scans before removing from state
- container-list RPC uses cached scanner state for consistency
- Increased Podman API timeout 30s → 60s (scanner + health monitor)
- Keep crashed containers visible as "exited" instead of podman rm -f
- Resolve host-gateway IP via ip route (podman 4.3.x compatibility)

ISO build fixes:
- AIUI web app inclusion: searches 5 paths + CI step to copy from build server
- Claude API proxy: systemctl enable with symlink fallback
- AIUI nginx: try_files =404 (was /aiui/index.html redirect loop)
- Build version set to 1.3.0

Container fixes:
- lnd-ui: nginx listens on 8080 (was 80, Permission denied in rootless)
- first-boot: image-versions.sh sourced from correct path with validation
- first-boot: host-gateway resolved to actual gateway IP

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-04-02 01:28:11 +01:00
parent 9d4fb805f5
commit ee7b5980dd
13 changed files with 206 additions and 71 deletions

View File

@@ -131,7 +131,37 @@ impl RpcHandler {
}
pub(super) async fn handle_container_list(&self) -> Result<serde_json::Value> {
// Try to get containers from orchestrator first
// Use the scanner's cached state for consistency with WebSocket updates.
// This prevents the container-list RPC from returning different results
// than the WebSocket-delivered package_data, which caused apps to flicker
// between "installed" and "not-installed" in the UI.
let (data, _) = self.state_manager.get_snapshot().await;
if data.server_info.status_info.containers_scanned && !data.package_data.is_empty() {
let containers: Vec<serde_json::Value> = data.package_data.iter().map(|(id, pkg)| {
let state = match &pkg.state {
crate::data_model::PackageState::Running => "running",
crate::data_model::PackageState::Stopped => "stopped",
crate::data_model::PackageState::Exited => "exited",
crate::data_model::PackageState::Starting => "created",
_ => "unknown",
};
let lan = pkg.installed.as_ref()
.and_then(|i| i.interface_addresses.get("main"))
.and_then(|a| a.lan_address.as_deref());
serde_json::json!({
"id": id,
"name": id,
"state": state,
"image": "",
"created": "",
"ports": [],
"lan_address": lan,
})
}).collect();
return Ok(serde_json::json!(containers));
}
// Fallback: scanner hasn't run yet, query podman directly
if let Some(orchestrator) = &self.orchestrator {
if let Ok(containers) = orchestrator.list_containers().await {
if !containers.is_empty() {
@@ -140,7 +170,6 @@ impl RpcHandler {
}
}
// Fallback: list containers directly via podman (for bundled apps)
let output = tokio::process::Command::new("podman")
.args(["ps", "-a", "--format", "json"])
.output()
@@ -156,11 +185,9 @@ impl RpcHandler {
return Ok(serde_json::json!([]));
}
// Parse podman JSON output
let podman_containers: Vec<serde_json::Value> = serde_json::from_str(&stdout)
.unwrap_or_else(|_| Vec::new());
// Convert to our ContainerStatus format
let containers: Vec<serde_json::Value> = podman_containers
.iter()
.map(|c| {
@@ -173,42 +200,7 @@ impl RpcHandler {
"paused" => "paused",
_ => "unknown",
};
let name = c.get("Names").and_then(|v| v.as_array()).and_then(|a| a.first()).and_then(|v| v.as_str()).unwrap_or("");
// Map container name to its UI port (lan_address)
let lan_address = match name {
"bitcoin-knots" | "bitcoin-ui" => Some("http://localhost:8334"),
"lnd" | "archy-lnd-ui" => Some("http://localhost:8081"),
"tailscale" => Some("http://localhost:8240"),
"homeassistant" => Some("http://localhost:8123"),
"archy-mempool-web" | "mempool" => Some("http://localhost:4080"),
"btcpay-server" => Some("http://localhost:23000"),
"grafana" => Some("http://localhost:3000"),
"searxng" => Some("http://localhost:8888"),
"ollama" => Some("http://localhost:11434"),
"onlyoffice" => Some("http://localhost:9980"),
"penpot" => Some("http://localhost:9001"),
"nextcloud" => Some("http://localhost:8085"),
"vaultwarden" => Some("http://localhost:8082"),
"jellyfin" => Some("http://localhost:8096"),
"photoprism" => Some("http://localhost:2342"),
"immich_server" | "immich" => Some("http://localhost:2283"),
"filebrowser" => Some("http://localhost:8083"),
"nginx-proxy-manager" => Some("http://localhost:81"),
"portainer" => Some("http://localhost:9000"),
"uptime-kuma" => Some("http://localhost:3001"),
"fedimint" => Some("http://localhost:8175"),
"fedimint-gateway" => Some("http://localhost:8176"),
"nostr-rs-relay" => Some("http://localhost:18081"),
"indeedhub" => Some("http://localhost:7777"),
"dwn" => Some("http://localhost:3100"),
"endurain" => Some("http://localhost:8080"),
"electrs" | "archy-electrs-ui" => Some("http://localhost:50002"),
_ => None,
};
// Parse ports from podman JSON (field is "host_port" in snake_case)
let ports: Vec<String> = c.get("Ports")
.and_then(|v| v.as_array())
.map(|a| {
@@ -220,7 +212,6 @@ impl RpcHandler {
}).collect()
})
.unwrap_or_default();
serde_json::json!({
"id": c.get("Id").and_then(|v| v.as_str()).unwrap_or(""),
"name": name,
@@ -228,7 +219,7 @@ impl RpcHandler {
"image": c.get("Image").and_then(|v| v.as_str()).unwrap_or(""),
"created": c.get("Created").and_then(|v| v.as_str()).unwrap_or(""),
"ports": ports,
"lan_address": lan_address,
"lan_address": serde_json::Value::Null,
})
})
.collect();

View File

@@ -192,7 +192,9 @@ impl RpcHandler {
}
// DNS: ensure host.containers.internal resolves (needed for Tor proxy, inter-service calls)
run_args.push("--add-host=host.containers.internal:host-gateway");
// Rootless podman 4.3.x doesn't support "host-gateway" — resolve to actual gateway IP
let host_gateway_flag = resolve_host_gateway().await;
run_args.push(&host_gateway_flag);
// Security hardening (skip for privileged containers)
let security_caps: Vec<String> = if !is_tailscale {
@@ -340,6 +342,8 @@ impl RpcHandler {
}
if state == "exited" {
// Container crashed immediately — get logs for diagnosis
// Keep the container (don't rm) so it shows as "exited" in My Apps
// instead of vanishing completely. User can retry or remove manually.
let logs = tokio::process::Command::new("podman")
.args(["logs", "--tail", "20", container_name])
.output()
@@ -351,11 +355,7 @@ impl RpcHandler {
format!("{}{}", stdout, stderr)
})
.unwrap_or_default();
install_log(&format!("INSTALL CRASH: {} — container exited. Logs:\n{}", package_id, &log_output.chars().take(1000).collect::<String>())).await;
let _ = tokio::process::Command::new("podman")
.args(["rm", "-f", container_name])
.output()
.await;
install_log(&format!("INSTALL CRASH: {} — container exited (kept for visibility). Logs:\n{}", package_id, &log_output.chars().take(1000).collect::<String>())).await;
return Err(anyhow::anyhow!(
"Container {} exited immediately after start. Logs: {}",
container_name,
@@ -936,3 +936,39 @@ autopilot.active=false\n",
Ok(serde_json::json!({ "token": token }))
}
}
/// Resolve the host gateway IP for --add-host flag.
/// Podman 4.3.x (Debian 12) doesn't support "host-gateway" in rootless mode,
/// so we resolve the default gateway IP from the routing table.
async fn resolve_host_gateway() -> String {
// Try `ip route` to get the default gateway
if let Ok(output) = tokio::process::Command::new("ip")
.args(["route", "show", "default"])
.output()
.await
{
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.starts_with("default") {
if let Some(gw) = line.split_whitespace().nth(2) {
if !gw.is_empty() {
return format!("--add-host=host.containers.internal:{}", gw);
}
}
}
}
}
// Fallback: try hostname -I (first IP)
if let Ok(output) = tokio::process::Command::new("hostname")
.args(["-I"])
.output()
.await
{
let stdout = String::from_utf8_lossy(&output.stdout);
if let Some(ip) = stdout.split_whitespace().next() {
return format!("--add-host=host.containers.internal:{}", ip);
}
}
// Last resort
"--add-host=host.containers.internal:10.0.2.2".to_string()
}

View File

@@ -362,7 +362,7 @@ fn parse_memory_string(s: &str) -> Option<u64> {
/// Query all containers and their health status.
async fn check_containers() -> Vec<ContainerHealth> {
let output = match tokio::time::timeout(
std::time::Duration::from_secs(30),
std::time::Duration::from_secs(60),
tokio::process::Command::new("podman")
.args(["ps", "-a", "--format", "json"])
.output(),
@@ -375,7 +375,7 @@ async fn check_containers() -> Vec<ContainerHealth> {
return Vec::new();
}
Err(_) => {
debug!("podman ps timed out (30s)");
debug!("podman ps timed out (60s)");
return Vec::new();
}
_ => return Vec::new(),

View File

@@ -11,6 +11,7 @@ use crate::state::StateManager;
use anyhow::Result;
use hyper::server::conn::Http;
use hyper::service::service_fn;
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
@@ -269,7 +270,10 @@ impl Server {
// Brief delay for containers to stabilize after boot
tokio::time::sleep(Duration::from_secs(3)).await;
info!("🐳 Scanning containers...");
if let Err(e) = scan_and_update_packages(&scanner, &state, identity_clone.as_ref()).await {
// Tracks how many consecutive scans each container has been absent from.
// Prevents UI flapping when podman intermittently returns incomplete results.
let mut absence_tracker: HashMap<String, u32> = HashMap::new();
if let Err(e) = scan_and_update_packages(&scanner, &state, identity_clone.as_ref(), &mut absence_tracker).await {
error!("Failed to scan containers: {}", e);
}
@@ -284,7 +288,7 @@ impl Server {
continue;
}
scanning.store(true, std::sync::atomic::Ordering::Relaxed);
if let Err(e) = scan_and_update_packages(&scanner, &state, identity_clone.as_ref()).await {
if let Err(e) = scan_and_update_packages(&scanner, &state, identity_clone.as_ref(), &mut absence_tracker).await {
error!("Failed to update containers: {}", e);
}
scanning.store(false, std::sync::atomic::Ordering::Relaxed);
@@ -458,15 +462,19 @@ async fn refresh_tor_address(state: &StateManager, identity: &NodeIdentity) -> R
Ok(())
}
/// Number of consecutive absent scans before removing a container from state.
/// 3 scans × 30s = 90 seconds of absence before removal.
const CONTAINER_ABSENCE_THRESHOLD: u32 = 3;
async fn scan_and_update_packages(
scanner: &DockerPackageScanner,
state: &StateManager,
identity: &NodeIdentity,
absence_tracker: &mut HashMap<String, u32>,
) -> Result<()> {
let packages = scanner.scan_containers().await?;
let (current_data, _) = state.get_snapshot().await;
let packages_changed = !packages.is_empty() && current_data.package_data != packages;
let tor_addr = docker_packages::read_tor_address("archipelago").await;
let tor_changed = tor_addr != current_data.server_info.tor_address;
let first_scan = !current_data.server_info.status_info.containers_scanned;
@@ -478,17 +486,58 @@ async fn scan_and_update_packages(
.unwrap_or(false);
let update_changed = update_available != current_data.server_info.status_info.updated;
if packages_changed || tor_changed || first_scan || update_changed {
let mut data = current_data;
if !packages.is_empty() {
data.package_data = packages;
// Empty scan result = podman failure or timeout, preserve existing state
if packages.is_empty() && !first_scan {
if tor_changed || update_changed {
let mut data = current_data;
data.server_info.tor_address = tor_addr.clone();
data.server_info.node_address = tor_addr.as_ref().map(|t| identity.node_address(t));
data.server_info.status_info.updated = update_available;
state.update_data(data).await;
}
return Ok(());
}
// Merge scan results with current state instead of full replacement.
// This prevents containers from vanishing when podman intermittently
// returns incomplete results under heavy load.
let mut merged = current_data.package_data.clone();
let mut changed = false;
// Update/add containers found in this scan
for (id, pkg) in &packages {
absence_tracker.remove(id);
if merged.get(id) != Some(pkg) {
merged.insert(id.clone(), pkg.clone());
changed = true;
}
}
// Track containers in state but missing from this scan.
// Only remove after CONTAINER_ABSENCE_THRESHOLD consecutive absent scans.
let current_ids: Vec<String> = merged.keys().cloned().collect();
for id in current_ids {
if !packages.contains_key(&id) {
let count = absence_tracker.entry(id.clone()).or_insert(0);
*count += 1;
if *count >= CONTAINER_ABSENCE_THRESHOLD {
debug!("Removing {} from state after {} consecutive absent scans", id, count);
merged.remove(&id);
absence_tracker.remove(&id);
changed = true;
}
}
}
if changed || tor_changed || first_scan || update_changed {
let mut data = current_data;
data.package_data = merged;
data.server_info.tor_address = tor_addr.clone();
data.server_info.node_address = tor_addr.as_ref().map(|t| identity.node_address(t));
data.server_info.status_info.containers_scanned = true;
data.server_info.status_info.updated = update_available;
state.update_data(data).await;
debug!("📦 State changed (packages={}, tor={}, first_scan={}, update={}), broadcasting update", packages_changed, tor_changed, first_scan, update_changed);
debug!("📦 State changed (packages={}, tor={}, first_scan={}, update={}), broadcasting update", changed, tor_changed, first_scan, update_changed);
}
Ok(())

View File

@@ -13,7 +13,7 @@ use thiserror::Error;
use tokio::net::UnixStream;
const API_VERSION: &str = "v4.0.0";
const DEFAULT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
const DEFAULT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
const LONG_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(120);
#[derive(Debug, Error)]