Tighten Pimox readiness checks

This commit is contained in:
juvdiaz 2026-05-26 12:51:09 -06:00
parent 3a627b6428
commit 928e548fc9
3 changed files with 95 additions and 38 deletions

View File

@ -123,6 +123,7 @@ resource "null_resource" "pimox_template_vm_create" {
pimox_host = var.pimox_host
pimox_user = var.pimox_user
ssh_key_path = var.pimox_ssh_key_path
qm_bin = var.pimox_qm_bin
builder_version = "2"
vmid = tostring(var.pimox_template_vmid)
name = var.pimox_template_name
@ -149,39 +150,49 @@ set -eu
vmid="${self.triggers.vmid}"
replace_existing="${self.triggers.replace_existing}"
qm_cmd="${self.triggers.qm_bin}"
if ! command -v qm >/dev/null 2>&1; then
if [ ! -x "$qm_cmd" ]; then
qm_cmd="$(command -v qm 2>/dev/null || true)"
fi
if [ -z "$qm_cmd" ]; then
echo "qm is not installed on this Pimox host" >&2
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo "passwordless sudo is required for Pimox automation" >&2
exit 1
fi
if ! ip link show "${self.triggers.bridge}" >/dev/null 2>&1; then
echo "Pimox bridge ${self.triggers.bridge} does not exist. Refusing to change Orange Pi networking." >&2
exit 1
fi
if sudo qm status "$vmid" >/dev/null 2>&1; then
if sudo qm config "$vmid" | grep -q '^template: 1$'; then
sudo qm set "$vmid" --agent enabled=1
if sudo "$qm_cmd" status "$vmid" >/dev/null 2>&1; then
if sudo "$qm_cmd" config "$vmid" | grep -q '^template: 1$'; then
sudo "$qm_cmd" set "$vmid" --agent enabled=1
exit 0
fi
if [ "$replace_existing" != "true" ]; then
echo "VM $vmid already exists and is not a template. Set pimox_template_replace_existing=true to rebuild it." >&2
exit 1
fi
sudo qm stop "$vmid" >/dev/null 2>&1 || true
sudo "$qm_cmd" stop "$vmid" >/dev/null 2>&1 || true
elapsed=0
while [ "$elapsed" -lt 300 ]; do
if sudo qm status "$vmid" | grep -q 'status: stopped'; then
if sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then
break
fi
sleep 5
elapsed=$((elapsed + 5))
done
sudo qm destroy "$vmid" --purge 1 >/dev/null 2>&1 || sudo qm destroy "$vmid"
sudo "$qm_cmd" destroy "$vmid" --purge 1 >/dev/null 2>&1 || sudo "$qm_cmd" destroy "$vmid"
fi
sudo qm create "$vmid" \
sudo "$qm_cmd" create "$vmid" \
--name "${self.triggers.name}" \
--bios ovmf \
--boot "order=net0;scsi0" \
@ -195,10 +206,10 @@ sudo qm create "$vmid" \
--vga virtio \
--agent enabled=1
sudo qm set "$vmid" --efidisk0 "${self.triggers.efidisk0}"
sudo qm set "$vmid" --scsi0 "${self.triggers.scsi0}"
sudo qm set "$vmid" --agent enabled=1
sudo qm start "$vmid"
sudo "$qm_cmd" set "$vmid" --efidisk0 "${self.triggers.efidisk0}"
sudo "$qm_cmd" set "$vmid" --scsi0 "${self.triggers.scsi0}"
sudo "$qm_cmd" set "$vmid" --agent enabled=1
sudo "$qm_cmd" start "$vmid"
EOT
]
}
@ -213,6 +224,7 @@ resource "null_resource" "pimox_template_vm_seal" {
pimox_host = var.pimox_host
pimox_user = var.pimox_user
pimox_key_path = var.pimox_ssh_key_path
pimox_qm_bin = var.pimox_qm_bin
guest_host = var.pimox_template_build_host
guest_user = var.pimox_template_build_user
guest_key_path = var.pimox_template_build_ssh_key_path
@ -231,6 +243,7 @@ set -euo pipefail
pimox_host="${self.triggers.pimox_host}"
pimox_user="${self.triggers.pimox_user}"
pimox_key="${self.triggers.pimox_key_path}"
pimox_qm_bin="${self.triggers.pimox_qm_bin}"
guest_host="${self.triggers.guest_host}"
guest_user="${self.triggers.guest_user}"
guest_key="${self.triggers.guest_key_path}"
@ -252,7 +265,7 @@ ssh_guest() {
}
guest_ip_from_agent() {
guest_json="$(ssh_pimox "sudo qm guest cmd '$vmid' network-get-interfaces" 2>/dev/null || true)"
guest_json="$(ssh_pimox "sudo '$pimox_qm_bin' guest cmd '$vmid' network-get-interfaces" 2>/dev/null || true)"
if [ -z "$guest_json" ]; then
return 1
fi
@ -282,8 +295,8 @@ sys.exit(1)
PY
}
if ssh_pimox "sudo qm config '$vmid' | grep -q '^template: 1$'"; then
ssh_pimox "sudo qm set '$vmid' --agent enabled=1"
if ssh_pimox "sudo '$pimox_qm_bin' config '$vmid' | grep -q '^template: 1$'"; then
ssh_pimox "sudo '$pimox_qm_bin' set '$vmid' --agent enabled=1"
exit 0
fi
@ -323,6 +336,7 @@ resource "null_resource" "pimox_template_vm_finalize" {
pimox_host = var.pimox_host
pimox_user = var.pimox_user
ssh_key_path = var.pimox_ssh_key_path
qm_bin = var.pimox_qm_bin
finalizer_version = "2"
vmid = tostring(var.pimox_template_vmid)
}
@ -340,27 +354,38 @@ resource "null_resource" "pimox_template_vm_finalize" {
set -eu
vmid="${self.triggers.vmid}"
if sudo qm config "$vmid" | grep -q '^template: 1$'; then
sudo qm set "$vmid" --agent enabled=1
qm_cmd="${self.triggers.qm_bin}"
if [ ! -x "$qm_cmd" ]; then
qm_cmd="$(command -v qm 2>/dev/null || true)"
fi
if [ -z "$qm_cmd" ]; then
echo "qm is not installed on this Pimox host" >&2
exit 1
fi
if sudo "$qm_cmd" config "$vmid" | grep -q '^template: 1$'; then
sudo "$qm_cmd" set "$vmid" --agent enabled=1
exit 0
fi
elapsed=0
while [ "$elapsed" -lt 600 ]; do
if sudo qm status "$vmid" | grep -q 'status: stopped'; then
if sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then
break
fi
sleep 5
elapsed=$((elapsed + 5))
done
if ! sudo qm status "$vmid" | grep -q 'status: stopped'; then
if ! sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then
echo "Timed out waiting for VM $vmid to stop before template conversion" >&2
exit 1
fi
sudo qm set "$vmid" --boot "order=scsi0;net0"
sudo qm template "$vmid"
sudo "$qm_cmd" set "$vmid" --boot "order=scsi0;net0"
sudo "$qm_cmd" template "$vmid"
EOT
]
}

View File

@ -174,6 +174,11 @@ variable "pimox_ssh_key_path" {
default = "/home/jv/.ssh/id_ed25519"
}
variable "pimox_qm_bin" {
type = string
default = "/usr/sbin/qm"
}
variable "pimox_template_vmid" {
type = number
default = 9000

61
lab.sh
View File

@ -100,8 +100,9 @@ pimox_guest_ipv4() {
local key_path="$3"
local vmid="$4"
local ip_prefix="$5"
local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}"
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo qm guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
if [[ -z "${guest_json}" ]]; then
return 1
fi
@ -185,6 +186,7 @@ ensure_pimox_worker_node() {
local worker_key_path="${15}"
local ip_prefix="${16}"
local timeout_seconds="${17}"
local qm_bin="${18}"
local padded
local vmid
local worker_key
@ -200,26 +202,26 @@ ensure_pimox_worker_node() {
node_name="${worker_node_prefix}-${padded}"
mac="$(pimox_generated_mac "${vmid}")"
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm status '${vmid}' >/dev/null 2>&1"; then
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm config '${vmid}' | grep -q '^template: 1$'"; then
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then
echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2
exit 1
fi
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm set '${vmid}' --agent enabled=1
if sudo qm status '${vmid}' | grep -q 'status: stopped'; then sudo qm start '${vmid}'; fi"
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${vmid}' --agent enabled=1
if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi"
else
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2
exit 1
fi
sudo qm clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1
sudo qm set '${vmid}' --agent enabled=1
sudo qm set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}'
sudo qm set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}'
sudo qm set '${vmid}' --boot 'order=scsi0;net0'
sudo qm set '${vmid}' --onboot 1
sudo qm start '${vmid}'"
sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1
sudo '${qm_bin}' set '${vmid}' --agent enabled=1
sudo '${qm_bin}' set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}'
sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}'
sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0'
sudo '${qm_bin}' set '${vmid}' --onboot 1
sudo '${qm_bin}' start '${vmid}'"
fi
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then
@ -279,6 +281,7 @@ run_pimox_pipeline() {
local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}"
local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}"
local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}"
local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}"
local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}"
local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}"
local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}"
@ -297,22 +300,44 @@ run_pimox_pipeline() {
local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv"
local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json"
local index
local readiness_output
local readiness_status
if disabled_value "${mode}"; then
return 0
fi
if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then
mode="true"
fi
if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2
exit 1
fi
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "command -v qm >/dev/null 2>&1 && ip link show '${bridge}' >/dev/null 2>&1"; then
set +e
readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then
echo 'qm was not found in PATH and ${qm_bin} is not executable'
exit 1
fi
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'bridge ${bridge} was not found'
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo 'passwordless sudo is not available for ${pimox_user}'
exit 1
fi" 2>&1)"
readiness_status=$?
set -e
if ((readiness_status != 0)); then
if [[ "${mode}" == "auto" ]]; then
echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready."
return 0
fi
echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not reachable or bridge ${bridge} is missing." >&2
echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2
exit 1
fi
@ -327,6 +352,7 @@ run_pimox_pipeline() {
export TF_VAR_pimox_host="${pimox_host}"
export TF_VAR_pimox_user="${pimox_user}"
export TF_VAR_pimox_ssh_key_path="${pimox_key}"
export TF_VAR_pimox_qm_bin="${qm_bin}"
export TF_VAR_pimox_template_bridge="${bridge}"
export TF_VAR_pimox_template_vmid="${template_vmid}"
export TF_VAR_pimox_template_name="${template_name}"
@ -343,11 +369,11 @@ run_pimox_pipeline() {
return 0
fi
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm config '${template_vmid}' | grep -q '^template: 1$'"; then
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then
echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2
exit 1
fi
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm set '${template_vmid}' --agent enabled=1"
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1"
mkdir -p "${REPO_ROOT}/.lab"
: >"${spec_file}"
@ -369,7 +395,8 @@ run_pimox_pipeline() {
"${worker_user}" \
"${worker_key_path}" \
"${ip_prefix}" \
"${timeout_seconds}"
"${timeout_seconds}" \
"${qm_bin}"
done
write_cluster_worker_var_file "${spec_file}" "${var_file}"