Improve Pimox worker boot diagnostics
Homelab Main / deploy (push) Has been cancelled
Details
Homelab Main / deploy (push) Has been cancelled
Details
This commit is contained in:
parent
14b784ef9a
commit
8a55d14542
66
lab.sh
66
lab.sh
|
|
@ -120,7 +120,7 @@ pimox_guest_ipv4() {
|
||||||
local key_path="$3"
|
local key_path="$3"
|
||||||
local vmid="$4"
|
local vmid="$4"
|
||||||
local ip_prefix="$5"
|
local ip_prefix="$5"
|
||||||
local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}"
|
local qm_bin="${6:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
|
||||||
|
|
||||||
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
|
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
|
||||||
if [[ -z "${guest_json}" ]]; then
|
if [[ -z "${guest_json}" ]]; then
|
||||||
|
|
@ -153,6 +153,22 @@ sys.exit(1)
|
||||||
PY
|
PY
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pimox_worker_vm_debug() {
|
||||||
|
local host="$1"
|
||||||
|
local user="$2"
|
||||||
|
local key_path="$3"
|
||||||
|
local vmid="$4"
|
||||||
|
local qm_bin="$5"
|
||||||
|
|
||||||
|
pimox_ssh "${host}" "${user}" "${key_path}" "set +e
|
||||||
|
echo 'Pimox VM ${vmid} status:'
|
||||||
|
sudo '${qm_bin}' status '${vmid}'
|
||||||
|
echo 'Pimox VM ${vmid} config summary:'
|
||||||
|
sudo '${qm_bin}' config '${vmid}' | grep -E '^(agent|boot|net0|scsi0|virtio0|sata0|ide0|ide2|efidisk0):' || true
|
||||||
|
echo 'Pimox VM ${vmid} guest-agent network-get-interfaces:'
|
||||||
|
sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" >&2 || true
|
||||||
|
}
|
||||||
|
|
||||||
wait_for_pimox_guest_ssh() {
|
wait_for_pimox_guest_ssh() {
|
||||||
local host="$1"
|
local host="$1"
|
||||||
local user="$2"
|
local user="$2"
|
||||||
|
|
@ -162,20 +178,56 @@ wait_for_pimox_guest_ssh() {
|
||||||
local guest_key_path="$6"
|
local guest_key_path="$6"
|
||||||
local ip_prefix="$7"
|
local ip_prefix="$7"
|
||||||
local timeout_seconds="$8"
|
local timeout_seconds="$8"
|
||||||
|
local qm_bin="${9:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
|
||||||
local deadline
|
local deadline
|
||||||
|
local elapsed
|
||||||
local guest_ip
|
local guest_ip
|
||||||
|
local ip_filter_description
|
||||||
|
local last_guest_ip=""
|
||||||
|
local last_ssh_output=""
|
||||||
|
local next_log
|
||||||
|
local ssh_output
|
||||||
|
|
||||||
|
ip_filter_description="matching prefix ${ip_prefix}"
|
||||||
|
if [[ -z "${ip_prefix}" ]]; then
|
||||||
|
ip_filter_description="that is not loopback or link-local"
|
||||||
|
fi
|
||||||
|
|
||||||
deadline=$((SECONDS + timeout_seconds))
|
deadline=$((SECONDS + timeout_seconds))
|
||||||
|
next_log="${SECONDS}"
|
||||||
while ((SECONDS < deadline)); do
|
while ((SECONDS < deadline)); do
|
||||||
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" || true)"
|
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" "${qm_bin}" || true)"
|
||||||
if [[ -n "${guest_ip}" ]] &&
|
if [[ -n "${guest_ip}" ]]; then
|
||||||
ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true >/dev/null 2>&1; then
|
last_guest_ip="${guest_ip}"
|
||||||
printf '%s\n' "${guest_ip}"
|
if ssh_output="$(ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true 2>&1)"; then
|
||||||
return 0
|
printf '%s\n' "${guest_ip}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
last_ssh_output="${ssh_output}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ((SECONDS >= next_log)); then
|
||||||
|
elapsed=$((timeout_seconds - (deadline - SECONDS)))
|
||||||
|
if [[ -n "${last_guest_ip}" ]]; then
|
||||||
|
echo "Waiting for SSH to worker VM ${vmid} at ${last_guest_ip} as ${guest_user} (${elapsed}s elapsed)..." >&2
|
||||||
|
else
|
||||||
|
echo "Waiting for worker VM ${vmid} to report an IPv4 address ${ip_filter_description} through qemu-guest-agent (${elapsed}s elapsed)..." >&2
|
||||||
|
fi
|
||||||
|
next_log=$((SECONDS + 60))
|
||||||
fi
|
fi
|
||||||
sleep 10
|
sleep 10
|
||||||
done
|
done
|
||||||
|
|
||||||
|
if [[ -n "${last_guest_ip}" ]]; then
|
||||||
|
echo "Worker VM ${vmid} reported guest IP ${last_guest_ip}, but SSH as ${guest_user} never became reachable." >&2
|
||||||
|
if [[ -n "${last_ssh_output}" ]]; then
|
||||||
|
echo "Last SSH failure: ${last_ssh_output}" >&2
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Worker VM ${vmid} did not report an IPv4 address ${ip_filter_description} through qemu-guest-agent." >&2
|
||||||
|
fi
|
||||||
|
pimox_worker_vm_debug "${host}" "${user}" "${key_path}" "${vmid}" "${qm_bin}"
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -358,7 +410,7 @@ sudo '${qm_bin}' set '${vmid}' --onboot 1
|
||||||
sudo '${qm_bin}' start '${vmid}'"
|
sudo '${qm_bin}' start '${vmid}'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then
|
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}" "${qm_bin}")"; then
|
||||||
echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2
|
echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue