fix: image pull timeout actually triggers fallback

Previous timeout used ExitStatus::default() which is success on Linux,
so the fallback never triggered. Now properly kills process, awaits
exit, and forces fallback path on timeout.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian
2026-04-12 10:08:22 -04:00
parent b0656b068f
commit 877b3e4168

View File

@@ -612,19 +612,27 @@ impl RpcHandler {
}
// Timeout primary pull after 60s — if registry is down, fail fast to fallback
let status = tokio::time::timeout(
let timed_out;
let status = match tokio::time::timeout(
std::time::Duration::from_secs(60),
child.wait(),
)
.await
.unwrap_or_else(|_| {
// Timeout: kill the stuck process
let _ = child.kill();
tracing::warn!("Image pull timed out after 60s: {}", docker_image);
Ok(std::process::ExitStatus::default())
})
.context("Failed to wait for image pull")?;
if !status.success() {
{
Ok(result) => {
timed_out = false;
result.context("Failed to wait for image pull")?
}
Err(_) => {
// Timeout: kill the stuck process
tracing::warn!("Image pull timed out after 60s: {}", docker_image);
let _ = child.kill().await;
timed_out = true;
// Wait for process to actually exit after kill
child.wait().await.context("Failed to wait after kill")?
}
};
if timed_out || !status.success() {
// Try all configured fallback registries dynamically
match crate::container::registry::pull_from_registries(
&self.config.data_dir,