From 928e548fc9bb5bb2e41e84e37da163194867c54f Mon Sep 17 00:00:00 2001 From: juvdiaz Date: Tue, 26 May 2026 12:51:09 -0600 Subject: [PATCH] Tighten Pimox readiness checks --- bootstrap/provisioning/main.tf | 67 ++++++++++++++++++++--------- bootstrap/provisioning/variables.tf | 5 +++ lab.sh | 61 ++++++++++++++++++-------- 3 files changed, 95 insertions(+), 38 deletions(-) diff --git a/bootstrap/provisioning/main.tf b/bootstrap/provisioning/main.tf index cd783ba..b4c5941 100644 --- a/bootstrap/provisioning/main.tf +++ b/bootstrap/provisioning/main.tf @@ -123,6 +123,7 @@ resource "null_resource" "pimox_template_vm_create" { pimox_host = var.pimox_host pimox_user = var.pimox_user ssh_key_path = var.pimox_ssh_key_path + qm_bin = var.pimox_qm_bin builder_version = "2" vmid = tostring(var.pimox_template_vmid) name = var.pimox_template_name @@ -149,39 +150,49 @@ set -eu vmid="${self.triggers.vmid}" replace_existing="${self.triggers.replace_existing}" +qm_cmd="${self.triggers.qm_bin}" -if ! command -v qm >/dev/null 2>&1; then +if [ ! -x "$qm_cmd" ]; then + qm_cmd="$(command -v qm 2>/dev/null || true)" +fi + +if [ -z "$qm_cmd" ]; then echo "qm is not installed on this Pimox host" >&2 exit 1 fi +if ! sudo -n true >/dev/null 2>&1; then + echo "passwordless sudo is required for Pimox automation" >&2 + exit 1 +fi + if ! ip link show "${self.triggers.bridge}" >/dev/null 2>&1; then echo "Pimox bridge ${self.triggers.bridge} does not exist. Refusing to change Orange Pi networking." >&2 exit 1 fi -if sudo qm status "$vmid" >/dev/null 2>&1; then - if sudo qm config "$vmid" | grep -q '^template: 1$'; then - sudo qm set "$vmid" --agent enabled=1 +if sudo "$qm_cmd" status "$vmid" >/dev/null 2>&1; then + if sudo "$qm_cmd" config "$vmid" | grep -q '^template: 1$'; then + sudo "$qm_cmd" set "$vmid" --agent enabled=1 exit 0 fi if [ "$replace_existing" != "true" ]; then echo "VM $vmid already exists and is not a template. Set pimox_template_replace_existing=true to rebuild it." >&2 exit 1 fi - sudo qm stop "$vmid" >/dev/null 2>&1 || true + sudo "$qm_cmd" stop "$vmid" >/dev/null 2>&1 || true elapsed=0 while [ "$elapsed" -lt 300 ]; do - if sudo qm status "$vmid" | grep -q 'status: stopped'; then + if sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then break fi sleep 5 elapsed=$((elapsed + 5)) done - sudo qm destroy "$vmid" --purge 1 >/dev/null 2>&1 || sudo qm destroy "$vmid" + sudo "$qm_cmd" destroy "$vmid" --purge 1 >/dev/null 2>&1 || sudo "$qm_cmd" destroy "$vmid" fi -sudo qm create "$vmid" \ +sudo "$qm_cmd" create "$vmid" \ --name "${self.triggers.name}" \ --bios ovmf \ --boot "order=net0;scsi0" \ @@ -195,10 +206,10 @@ sudo qm create "$vmid" \ --vga virtio \ --agent enabled=1 -sudo qm set "$vmid" --efidisk0 "${self.triggers.efidisk0}" -sudo qm set "$vmid" --scsi0 "${self.triggers.scsi0}" -sudo qm set "$vmid" --agent enabled=1 -sudo qm start "$vmid" +sudo "$qm_cmd" set "$vmid" --efidisk0 "${self.triggers.efidisk0}" +sudo "$qm_cmd" set "$vmid" --scsi0 "${self.triggers.scsi0}" +sudo "$qm_cmd" set "$vmid" --agent enabled=1 +sudo "$qm_cmd" start "$vmid" EOT ] } @@ -213,6 +224,7 @@ resource "null_resource" "pimox_template_vm_seal" { pimox_host = var.pimox_host pimox_user = var.pimox_user pimox_key_path = var.pimox_ssh_key_path + pimox_qm_bin = var.pimox_qm_bin guest_host = var.pimox_template_build_host guest_user = var.pimox_template_build_user guest_key_path = var.pimox_template_build_ssh_key_path @@ -231,6 +243,7 @@ set -euo pipefail pimox_host="${self.triggers.pimox_host}" pimox_user="${self.triggers.pimox_user}" pimox_key="${self.triggers.pimox_key_path}" +pimox_qm_bin="${self.triggers.pimox_qm_bin}" guest_host="${self.triggers.guest_host}" guest_user="${self.triggers.guest_user}" guest_key="${self.triggers.guest_key_path}" @@ -252,7 +265,7 @@ ssh_guest() { } guest_ip_from_agent() { - guest_json="$(ssh_pimox "sudo qm guest cmd '$vmid' network-get-interfaces" 2>/dev/null || true)" + guest_json="$(ssh_pimox "sudo '$pimox_qm_bin' guest cmd '$vmid' network-get-interfaces" 2>/dev/null || true)" if [ -z "$guest_json" ]; then return 1 fi @@ -282,8 +295,8 @@ sys.exit(1) PY } -if ssh_pimox "sudo qm config '$vmid' | grep -q '^template: 1$'"; then - ssh_pimox "sudo qm set '$vmid' --agent enabled=1" +if ssh_pimox "sudo '$pimox_qm_bin' config '$vmid' | grep -q '^template: 1$'"; then + ssh_pimox "sudo '$pimox_qm_bin' set '$vmid' --agent enabled=1" exit 0 fi @@ -323,6 +336,7 @@ resource "null_resource" "pimox_template_vm_finalize" { pimox_host = var.pimox_host pimox_user = var.pimox_user ssh_key_path = var.pimox_ssh_key_path + qm_bin = var.pimox_qm_bin finalizer_version = "2" vmid = tostring(var.pimox_template_vmid) } @@ -340,27 +354,38 @@ resource "null_resource" "pimox_template_vm_finalize" { set -eu vmid="${self.triggers.vmid}" -if sudo qm config "$vmid" | grep -q '^template: 1$'; then - sudo qm set "$vmid" --agent enabled=1 +qm_cmd="${self.triggers.qm_bin}" + +if [ ! -x "$qm_cmd" ]; then + qm_cmd="$(command -v qm 2>/dev/null || true)" +fi + +if [ -z "$qm_cmd" ]; then + echo "qm is not installed on this Pimox host" >&2 + exit 1 +fi + +if sudo "$qm_cmd" config "$vmid" | grep -q '^template: 1$'; then + sudo "$qm_cmd" set "$vmid" --agent enabled=1 exit 0 fi elapsed=0 while [ "$elapsed" -lt 600 ]; do - if sudo qm status "$vmid" | grep -q 'status: stopped'; then + if sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then break fi sleep 5 elapsed=$((elapsed + 5)) done -if ! sudo qm status "$vmid" | grep -q 'status: stopped'; then +if ! sudo "$qm_cmd" status "$vmid" | grep -q 'status: stopped'; then echo "Timed out waiting for VM $vmid to stop before template conversion" >&2 exit 1 fi -sudo qm set "$vmid" --boot "order=scsi0;net0" -sudo qm template "$vmid" +sudo "$qm_cmd" set "$vmid" --boot "order=scsi0;net0" +sudo "$qm_cmd" template "$vmid" EOT ] } diff --git a/bootstrap/provisioning/variables.tf b/bootstrap/provisioning/variables.tf index 2322559..b31be75 100644 --- a/bootstrap/provisioning/variables.tf +++ b/bootstrap/provisioning/variables.tf @@ -174,6 +174,11 @@ variable "pimox_ssh_key_path" { default = "/home/jv/.ssh/id_ed25519" } +variable "pimox_qm_bin" { + type = string + default = "/usr/sbin/qm" +} + variable "pimox_template_vmid" { type = number default = 9000 diff --git a/lab.sh b/lab.sh index ef771d7..ca8e7a2 100755 --- a/lab.sh +++ b/lab.sh @@ -100,8 +100,9 @@ pimox_guest_ipv4() { local key_path="$3" local vmid="$4" local ip_prefix="$5" + local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}" - guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo qm guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)" + guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)" if [[ -z "${guest_json}" ]]; then return 1 fi @@ -185,6 +186,7 @@ ensure_pimox_worker_node() { local worker_key_path="${15}" local ip_prefix="${16}" local timeout_seconds="${17}" + local qm_bin="${18}" local padded local vmid local worker_key @@ -200,26 +202,26 @@ ensure_pimox_worker_node() { node_name="${worker_node_prefix}-${padded}" mac="$(pimox_generated_mac "${vmid}")" - if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm status '${vmid}' >/dev/null 2>&1"; then - if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm config '${vmid}' | grep -q '^template: 1$'"; then + if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then + if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2 exit 1 fi - pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm set '${vmid}' --agent enabled=1 -if sudo qm status '${vmid}' | grep -q 'status: stopped'; then sudo qm start '${vmid}'; fi" + pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${vmid}' --agent enabled=1 +if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi" else pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu if ! ip link show '${bridge}' >/dev/null 2>&1; then echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2 exit 1 fi -sudo qm clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 -sudo qm set '${vmid}' --agent enabled=1 -sudo qm set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}' -sudo qm set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}' -sudo qm set '${vmid}' --boot 'order=scsi0;net0' -sudo qm set '${vmid}' --onboot 1 -sudo qm start '${vmid}'" +sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 +sudo '${qm_bin}' set '${vmid}' --agent enabled=1 +sudo '${qm_bin}' set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}' +sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}' +sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0' +sudo '${qm_bin}' set '${vmid}' --onboot 1 +sudo '${qm_bin}' start '${vmid}'" fi if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then @@ -279,6 +281,7 @@ run_pimox_pipeline() { local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}" local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}" local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}" + local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}" local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}" local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}" local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}" @@ -297,22 +300,44 @@ run_pimox_pipeline() { local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv" local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json" local index + local readiness_output + local readiness_status if disabled_value "${mode}"; then return 0 fi + if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then + mode="true" + fi + if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2 exit 1 fi - if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "command -v qm >/dev/null 2>&1 && ip link show '${bridge}' >/dev/null 2>&1"; then + set +e + readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu +if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then + echo 'qm was not found in PATH and ${qm_bin} is not executable' + exit 1 +fi +if ! ip link show '${bridge}' >/dev/null 2>&1; then + echo 'bridge ${bridge} was not found' + exit 1 +fi +if ! sudo -n true >/dev/null 2>&1; then + echo 'passwordless sudo is not available for ${pimox_user}' + exit 1 +fi" 2>&1)" + readiness_status=$? + set -e + if ((readiness_status != 0)); then if [[ "${mode}" == "auto" ]]; then echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready." return 0 fi - echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not reachable or bridge ${bridge} is missing." >&2 + echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2 exit 1 fi @@ -327,6 +352,7 @@ run_pimox_pipeline() { export TF_VAR_pimox_host="${pimox_host}" export TF_VAR_pimox_user="${pimox_user}" export TF_VAR_pimox_ssh_key_path="${pimox_key}" + export TF_VAR_pimox_qm_bin="${qm_bin}" export TF_VAR_pimox_template_bridge="${bridge}" export TF_VAR_pimox_template_vmid="${template_vmid}" export TF_VAR_pimox_template_name="${template_name}" @@ -343,11 +369,11 @@ run_pimox_pipeline() { return 0 fi - if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm config '${template_vmid}' | grep -q '^template: 1$'"; then + if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2 exit 1 fi - pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo qm set '${template_vmid}' --agent enabled=1" + pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1" mkdir -p "${REPO_ROOT}/.lab" : >"${spec_file}" @@ -369,7 +395,8 @@ run_pimox_pipeline() { "${worker_user}" \ "${worker_key_path}" \ "${ip_prefix}" \ - "${timeout_seconds}" + "${timeout_seconds}" \ + "${qm_bin}" done write_cluster_worker_var_file "${spec_file}" "${var_file}"