test(lifecycle): post-condition gate for use_quadlet_backends path
A six-test bats suite that validates what install_via_quadlet (Phase 3.2)
is supposed to leave behind:
* `.container` unit on disk in $XDG_CONFIG_HOME/containers/systemd/
with [Container] / [Service] / [Install] sections, Image= present,
and Restart=on-failure (the backend invariant — companions use Always)
* Phase 3.4 cross-check: any unit with HealthCmd= must also emit
Notify=healthy, otherwise systemctl start won't gate on health
* `systemctl --user is-active` returns 0 for the .service
* podman shows the container running
* the container's cgroup is under user.slice/, NOT under
archipelago.service — the kernel-level proof that FM3 cgroup
cascade SIGKILL is structurally fixed for this container
Auto-skips on every test when no backend Quadlet units exist (today's
default state, use_quadlet_backends=false) — so the suite is a no-op
on current fleet boxes and turns into a hard regression gate the
moment anyone flips the flag and reinstalls.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -57,7 +57,7 @@ v1.7.52 tags.
|
||||
| L0 unit | 628 | n/a | ● green |
|
||||
| L1 RPC | 70 | bitcoin-knots, lnd, electrumx, btcpay, mempool, fedimint, required-stack, package-update-smoke | ● for the 6 core apps |
|
||||
| L2 UI | 9 | ui-coverage | ● for dashboard + 7 proxy paths + bitcoin-ui:8334 |
|
||||
| L3 lifecycle survival | 8 | companion-survives-archipelago-restart, backend-survives-archipelago-restart, required-stack-destructive | ◐ companions ● ; backends ◐ regression-gate (will fail until Phase 3 Quadlet ships) |
|
||||
| L3 lifecycle survival | 14 | companion-survives-archipelago-restart, backend-survives-archipelago-restart, required-stack-destructive, use-quadlet-backends-install | ◐ companions ● ; backends ◐ regression-gate (will fail until Phase 3 Quadlet ships); quadlet post-condition gate ✅ skip-clean today, hard gate when flag flipped |
|
||||
| L4 browser journey | 0 | none | ○ not started |
|
||||
| L5 chaos | 0 | none | ○ not started |
|
||||
| L6 performance | 0 | none | ○ not started |
|
||||
|
||||
149
tests/lifecycle/bats/use-quadlet-backends-install.bats
Normal file
149
tests/lifecycle/bats/use-quadlet-backends-install.bats
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env bats
|
||||
# tests/lifecycle/bats/use-quadlet-backends-install.bats
|
||||
#
|
||||
# Validates the post-condition of Phase 3.2's `use_quadlet_backends`
|
||||
# install path. When the orchestrator routed at least one backend
|
||||
# install through `install_via_quadlet`, this suite asserts that the
|
||||
# resulting state has the four properties the Phase 3 design promises:
|
||||
#
|
||||
# 1. A `.container` unit file exists in ~/.config/containers/systemd/
|
||||
# and is well-formed (required sections + directives).
|
||||
# 2. The corresponding `.service` is active under `systemctl --user`.
|
||||
# 3. The container is in `podman ps` (running).
|
||||
# 4. The container's cgroup is under `user.slice/...`, NOT under
|
||||
# `archipelago.service` — proving FM3 (cgroup cascade SIGKILL on
|
||||
# archipelago restart) is structurally fixed for that container.
|
||||
#
|
||||
# Auto-skips if no Quadlet-managed backend exists yet — so it runs as a
|
||||
# no-op on nodes where `use_quadlet_backends` is still false (today's
|
||||
# default), and turns into a hard regression gate as soon as anyone
|
||||
# flips the flag and reinstalls.
|
||||
#
|
||||
# Run on a node with rootless podman + systemd-user (every alpha-fleet
|
||||
# box). No env vars required for the read-only checks. The cleanup
|
||||
# section at the bottom is gated by ARCHY_ALLOW_DESTRUCTIVE=1.
|
||||
|
||||
quadlet_dir() {
|
||||
echo "${XDG_CONFIG_HOME:-$HOME/.config}/containers/systemd"
|
||||
}
|
||||
|
||||
# List Quadlet `.container` units that correspond to backend containers
|
||||
# (i.e., NOT companions like archy-*-ui, which already shipped via Quadlet
|
||||
# in v1.7.41 and have their own coverage in companion-survives-archipelago-
|
||||
# restart.bats). Echoes one container name per line; empty if none found.
|
||||
backend_quadlet_units() {
|
||||
local d
|
||||
d="$(quadlet_dir)"
|
||||
[[ -d "$d" ]] || return 0
|
||||
# Strip the .container extension; filter out archy-*-ui companions.
|
||||
for f in "$d"/*.container; do
|
||||
[[ -e "$f" ]] || continue
|
||||
local name
|
||||
name="$(basename "$f" .container)"
|
||||
[[ "$name" =~ ^archy-.*-ui$ ]] && continue
|
||||
echo "$name"
|
||||
done
|
||||
}
|
||||
|
||||
# Read the cgroup path of a running container's main process. For
|
||||
# rootless podman the conmon-run target lands the container's pid1 in
|
||||
# the cgroup that owns its supervising .service.
|
||||
container_cgroup_path() {
|
||||
local name="$1"
|
||||
local pid
|
||||
pid="$(podman inspect --format '{{.State.Pid}}' "$name" 2>/dev/null)"
|
||||
[[ -n "$pid" && "$pid" != "0" ]] || return 1
|
||||
# cgroup v2 line: "0::/path/to/cgroup"
|
||||
awk -F: '$1=="0"{print $3}' "/proc/$pid/cgroup" 2>/dev/null
|
||||
}
|
||||
|
||||
# Per-test gate. Each @test calls this so the suite is a clean no-op on
|
||||
# nodes where use_quadlet_backends is still false (today's default) —
|
||||
# bats doesn't propagate setup-level skip semantics across @test blocks.
|
||||
require_quadlet_backends() {
|
||||
local count
|
||||
count="$(backend_quadlet_units | wc -l)"
|
||||
(( count > 0 )) || skip "no backend .container units in $(quadlet_dir) — use_quadlet_backends not enabled or no backends installed"
|
||||
}
|
||||
|
||||
@test "Quadlet unit dir exists or is plausibly creatable" {
|
||||
local d
|
||||
d="$(quadlet_dir)"
|
||||
# Either it already exists, or its parent does (so quadlet can mkdir it).
|
||||
[[ -d "$d" ]] || [[ -d "$(dirname "$d")" ]] \
|
||||
|| skip "no XDG_CONFIG_HOME and no \$HOME/.config — not a desktop-style host"
|
||||
}
|
||||
|
||||
@test "each backend Quadlet unit has the required sections + directives" {
|
||||
require_quadlet_backends
|
||||
local d
|
||||
d="$(quadlet_dir)"
|
||||
while read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
local body
|
||||
body="$(<"$d/$name.container")"
|
||||
# [Container] section + Image=
|
||||
[[ "$body" == *"[Container]"* ]] || fail "$name: missing [Container] section"
|
||||
[[ "$body" == *"Image="* ]] || fail "$name: missing Image= directive"
|
||||
# [Service] section with the Phase 3.2 backend invariant: Restart=on-failure.
|
||||
# Companions use Restart=always; backends use on-failure so an operator-issued
|
||||
# `systemctl stop` actually stays stopped.
|
||||
[[ "$body" == *"[Service]"* ]] || fail "$name: missing [Service] section"
|
||||
[[ "$body" == *"Restart=on-failure"* ]] \
|
||||
|| fail "$name: backend unit must use Restart=on-failure (got companion-style Restart=always)"
|
||||
# [Install] section so `systemctl --user enable` is well-defined.
|
||||
[[ "$body" == *"[Install]"* ]] || fail "$name: missing [Install] section"
|
||||
[[ "$body" == *"WantedBy="* ]] || fail "$name: missing WantedBy= in [Install]"
|
||||
done < <(backend_quadlet_units)
|
||||
}
|
||||
|
||||
@test "Phase 3.4: any unit emitting HealthCmd= also emits Notify=healthy" {
|
||||
require_quadlet_backends
|
||||
local d
|
||||
d="$(quadlet_dir)"
|
||||
while read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
local body
|
||||
body="$(<"$d/$name.container")"
|
||||
if [[ "$body" == *"HealthCmd="* ]]; then
|
||||
[[ "$body" == *"Notify=healthy"* ]] \
|
||||
|| fail "$name: HealthCmd= present but Notify=healthy missing — systemctl start won't gate on health"
|
||||
fi
|
||||
done < <(backend_quadlet_units)
|
||||
}
|
||||
|
||||
@test "every backend Quadlet unit's .service is active in systemctl --user" {
|
||||
require_quadlet_backends
|
||||
while read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
run systemctl --user is-active "$name.service"
|
||||
[[ "$status" -eq 0 ]] || fail "$name.service is '$output' — expected 'active'"
|
||||
done < <(backend_quadlet_units)
|
||||
}
|
||||
|
||||
@test "every backend Quadlet unit has a running podman container" {
|
||||
require_quadlet_backends
|
||||
while read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
run sh -c "podman inspect --format '{{.State.Running}}' '$name'"
|
||||
[[ "$status" -eq 0 ]] || fail "$name not present in podman"
|
||||
[[ "$output" == "true" ]] || fail "$name container exists but not running (state=$output)"
|
||||
done < <(backend_quadlet_units)
|
||||
}
|
||||
|
||||
@test "FM3 fix: backend cgroup is under user.slice, not archipelago.service" {
|
||||
require_quadlet_backends
|
||||
# The whole point of Phase 3 — verify the kernel-level invariant.
|
||||
while read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
local cg
|
||||
cg="$(container_cgroup_path "$name")" || skip "$name has no readable PID; container may have crashed mid-test"
|
||||
[[ -n "$cg" ]] || fail "$name: empty cgroup path"
|
||||
# Acceptable: anything under user.slice (rootless podman lands here when
|
||||
# quadlet-managed). Forbidden: anything under archipelago.service's tree.
|
||||
[[ "$cg" == *"user.slice"* ]] \
|
||||
|| fail "$name: cgroup '$cg' is not under user.slice — FM3 cascade still possible"
|
||||
[[ "$cg" != *"archipelago.service"* ]] \
|
||||
|| fail "$name: cgroup '$cg' is under archipelago.service — Phase 3 promise broken"
|
||||
done < <(backend_quadlet_units)
|
||||
}
|
||||
Reference in New Issue
Block a user