Improve Pimox worker boot diagnostics
Homelab Main / deploy (push) Has been cancelled
Details
Homelab Main / deploy (push) Has been cancelled
Details
This commit is contained in:
parent
14b784ef9a
commit
8a55d14542
62
lab.sh
62
lab.sh
|
|
@ -120,7 +120,7 @@ pimox_guest_ipv4() {
|
|||
local key_path="$3"
|
||||
local vmid="$4"
|
||||
local ip_prefix="$5"
|
||||
local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}"
|
||||
local qm_bin="${6:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
|
||||
|
||||
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
|
||||
if [[ -z "${guest_json}" ]]; then
|
||||
|
|
@ -153,6 +153,22 @@ sys.exit(1)
|
|||
PY
|
||||
}
|
||||
|
||||
pimox_worker_vm_debug() {
|
||||
local host="$1"
|
||||
local user="$2"
|
||||
local key_path="$3"
|
||||
local vmid="$4"
|
||||
local qm_bin="$5"
|
||||
|
||||
pimox_ssh "${host}" "${user}" "${key_path}" "set +e
|
||||
echo 'Pimox VM ${vmid} status:'
|
||||
sudo '${qm_bin}' status '${vmid}'
|
||||
echo 'Pimox VM ${vmid} config summary:'
|
||||
sudo '${qm_bin}' config '${vmid}' | grep -E '^(agent|boot|net0|scsi0|virtio0|sata0|ide0|ide2|efidisk0):' || true
|
||||
echo 'Pimox VM ${vmid} guest-agent network-get-interfaces:'
|
||||
sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" >&2 || true
|
||||
}
|
||||
|
||||
wait_for_pimox_guest_ssh() {
|
||||
local host="$1"
|
||||
local user="$2"
|
||||
|
|
@ -162,20 +178,56 @@ wait_for_pimox_guest_ssh() {
|
|||
local guest_key_path="$6"
|
||||
local ip_prefix="$7"
|
||||
local timeout_seconds="$8"
|
||||
local qm_bin="${9:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
|
||||
local deadline
|
||||
local elapsed
|
||||
local guest_ip
|
||||
local ip_filter_description
|
||||
local last_guest_ip=""
|
||||
local last_ssh_output=""
|
||||
local next_log
|
||||
local ssh_output
|
||||
|
||||
ip_filter_description="matching prefix ${ip_prefix}"
|
||||
if [[ -z "${ip_prefix}" ]]; then
|
||||
ip_filter_description="that is not loopback or link-local"
|
||||
fi
|
||||
|
||||
deadline=$((SECONDS + timeout_seconds))
|
||||
next_log="${SECONDS}"
|
||||
while ((SECONDS < deadline)); do
|
||||
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" || true)"
|
||||
if [[ -n "${guest_ip}" ]] &&
|
||||
ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true >/dev/null 2>&1; then
|
||||
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" "${qm_bin}" || true)"
|
||||
if [[ -n "${guest_ip}" ]]; then
|
||||
last_guest_ip="${guest_ip}"
|
||||
if ssh_output="$(ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true 2>&1)"; then
|
||||
printf '%s\n' "${guest_ip}"
|
||||
return 0
|
||||
fi
|
||||
last_ssh_output="${ssh_output}"
|
||||
fi
|
||||
|
||||
if ((SECONDS >= next_log)); then
|
||||
elapsed=$((timeout_seconds - (deadline - SECONDS)))
|
||||
if [[ -n "${last_guest_ip}" ]]; then
|
||||
echo "Waiting for SSH to worker VM ${vmid} at ${last_guest_ip} as ${guest_user} (${elapsed}s elapsed)..." >&2
|
||||
else
|
||||
echo "Waiting for worker VM ${vmid} to report an IPv4 address ${ip_filter_description} through qemu-guest-agent (${elapsed}s elapsed)..." >&2
|
||||
fi
|
||||
next_log=$((SECONDS + 60))
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
if [[ -n "${last_guest_ip}" ]]; then
|
||||
echo "Worker VM ${vmid} reported guest IP ${last_guest_ip}, but SSH as ${guest_user} never became reachable." >&2
|
||||
if [[ -n "${last_ssh_output}" ]]; then
|
||||
echo "Last SSH failure: ${last_ssh_output}" >&2
|
||||
fi
|
||||
else
|
||||
echo "Worker VM ${vmid} did not report an IPv4 address ${ip_filter_description} through qemu-guest-agent." >&2
|
||||
fi
|
||||
pimox_worker_vm_debug "${host}" "${user}" "${key_path}" "${vmid}" "${qm_bin}"
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
|
|
@ -358,7 +410,7 @@ sudo '${qm_bin}' set '${vmid}' --onboot 1
|
|||
sudo '${qm_bin}' start '${vmid}'"
|
||||
fi
|
||||
|
||||
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then
|
||||
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}" "${qm_bin}")"; then
|
||||
echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in New Issue