#!/usr/bin/env bash set -euo pipefail REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" BUILDX_CONFIG="/tmp/buildx-config.toml" KUBECONFIG_PATH="${KUBECONFIG_PATH:-${TF_VAR_kubeconfig_path:-/home/jv/.kube/config}}" trap 'rm -f "${BUILDX_CONFIG}"' EXIT require_debian_server() { local command_name="$1" local os_id="" if [[ "$(uname -s)" != "Linux" ]]; then echo "Refusing to run '${command_name}' from this machine. Run it on the Debian homelab server." >&2 exit 1 fi if [[ -r /etc/os-release ]]; then os_id="$(awk -F= '$1 == "ID" {gsub(/"/, "", $2); print $2; exit}' /etc/os-release)" fi if [[ "${os_id}" != "debian" ]]; then echo "Refusing to run '${command_name}' on ${os_id:-unknown OS}. Run it on the Debian homelab server." >&2 exit 1 fi } run_tofu_stack() { local stack="$1" local -a apply_args=(-auto-approve) if [[ "${stack}" == "bootstrap/cluster" && -n "${LAB_CLUSTER_VAR_FILE:-}" ]]; then apply_args+=("-var-file=${LAB_CLUSTER_VAR_FILE}") fi tofu -chdir="${REPO_ROOT}/${stack}" init tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}" } truthy() { case "${1,,}" in 1 | true | yes | on) return 0 ;; *) return 1 ;; esac } disabled_value() { case "${1,,}" in 0 | false | no | off | disabled) return 0 ;; *) return 1 ;; esac } worker_index_is_skipped() { local index="$1" local skip_indexes="$2" local skip_index skip_indexes="${skip_indexes//,/ }" for skip_index in ${skip_indexes}; do [[ -z "${skip_index}" ]] && continue if ! [[ "${skip_index}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_SKIP_WORKER_INDEXES must contain only comma or space separated positive integers." >&2 exit 1 fi if ((skip_index == index)); then return 0 fi done return 1 } ensure_python3() { if command -v python3 >/dev/null 2>&1; then return 0 fi sudo apt-get update sudo apt-get install -y --no-install-recommends python3 } detect_route_interface() { local target="$1" ip route get "${target}" 2>/dev/null | awk ' { for (i = 1; i <= NF; i++) { if ($i == "dev") { print $(i + 1) exit } } } ' } pimox_ssh() { local host="$1" local user="$2" local key_path="$3" shift 3 ssh -i "${key_path}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${user}@${host}" "$@" } pimox_guest_ipv4() { local guest_json local host="$1" local user="$2" local key_path="$3" local vmid="$4" local ip_prefix="$5" local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}" guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)" if [[ -z "${guest_json}" ]]; then return 1 fi GUEST_JSON="${guest_json}" python3 - "${ip_prefix}" <<'PY' import json import os import sys prefix = sys.argv[1] try: interfaces = json.loads(os.environ.get("GUEST_JSON", "")) except Exception: sys.exit(1) for iface in interfaces or []: for address in iface.get("ip-addresses") or []: if address.get("ip-address-type") != "ipv4": continue ip = address.get("ip-address", "") if not ip or ip.startswith(("127.", "169.254.")): continue if prefix and not ip.startswith(prefix): continue print(ip) sys.exit(0) sys.exit(1) PY } wait_for_pimox_guest_ssh() { local host="$1" local user="$2" local key_path="$3" local vmid="$4" local guest_user="$5" local guest_key_path="$6" local ip_prefix="$7" local timeout_seconds="$8" local deadline local guest_ip deadline=$((SECONDS + timeout_seconds)) while ((SECONDS < deadline)); do guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" || true)" if [[ -n "${guest_ip}" ]] && ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true >/dev/null 2>&1; then printf '%s\n' "${guest_ip}" return 0 fi sleep 10 done return 1 } pimox_generated_mac() { local vmid="$1" printf '02:68:10:%02x:%02x:%02x\n' \ $(((vmid >> 16) & 255)) \ $(((vmid >> 8) & 255)) \ $((vmid & 255)) } ensure_pimox_worker_node() { local index="$1" local spec_file="$2" local pimox_host="$3" local pimox_user="$4" local pimox_key="$5" local template_vmid="$6" local bridge="$7" local worker_base_vmid="$8" local worker_name_prefix="$9" local worker_node_prefix="${10}" local worker_key_prefix="${11}" local worker_cores="${12}" local worker_memory="${13}" local worker_user="${14}" local worker_key_path="${15}" local ip_prefix="${16}" local timeout_seconds="${17}" local qm_bin="${18}" local worker_storage="${19}" local padded local vmid local worker_key local worker_name local node_name local mac local guest_ip printf -v padded '%02d' "${index}" vmid=$((worker_base_vmid + index - 1)) worker_key="${worker_key_prefix}${padded}" worker_name="${worker_name_prefix}-${padded}" node_name="${worker_node_prefix}-${padded}" mac="$(pimox_generated_mac "${vmid}")" if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2 exit 1 fi pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${vmid}' --agent enabled=1 if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi" else pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu if ! ip link show '${bridge}' >/dev/null 2>&1; then echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2 exit 1 fi pvesm_cmd=\"\$(command -v pvesm 2>/dev/null || true)\" if [ -z \"\$pvesm_cmd\" ] && [ -x /usr/sbin/pvesm ]; then pvesm_cmd=/usr/sbin/pvesm fi if [ -z \"\$pvesm_cmd\" ]; then echo 'pvesm was not found; cannot validate Pimox worker storage ${worker_storage}' >&2 exit 1 fi if ! sudo \"\$pvesm_cmd\" status | awk -v storage='${worker_storage}' 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then echo 'Pimox worker storage ${worker_storage} was not found. Refusing to create worker ${worker_name}.' >&2 exit 1 fi sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 --storage '${worker_storage}' sudo '${qm_bin}' set '${vmid}' --agent enabled=1 sudo '${qm_bin}' set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}' sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}' sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0' sudo '${qm_bin}' set '${vmid}' --onboot 1 sudo '${qm_bin}' start '${vmid}'" fi if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2 exit 1 fi printf '%s\t%s\t%s\t%s\t%s\n' "${worker_key}" "${guest_ip}" "${worker_user}" "${node_name}" "${worker_key_path}" >>"${spec_file}" } write_cluster_worker_var_file() { local spec_file="$1" local var_file="$2" LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-true}" \ LAB_RASPBERRY_HOST="${LAB_RASPBERRY_HOST:-192.168.100.89}" \ LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \ LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \ LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \ LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \ LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \ python3 - "${spec_file}" "${var_file}" <<'PY' import json import os import sys spec_file, var_file = sys.argv[1:3] nodes = {} node_labels = {} try: raspberry_labels = json.loads(os.environ["LAB_RASPBERRY_NODE_LABELS_JSON"]) pimox_labels = json.loads(os.environ["LAB_PIMOX_WORKER_NODE_LABELS_JSON"]) except json.JSONDecodeError as exc: raise SystemExit(f"Invalid node label JSON: {exc}") from exc if os.environ["LAB_INCLUDE_RASPBERRY_WORKER"].lower() not in {"0", "false", "no", "off", "disabled"}: nodes["raspberrypi"] = { "host": os.environ["LAB_RASPBERRY_HOST"], "user": os.environ["LAB_RASPBERRY_USER"], "node_name": os.environ["LAB_RASPBERRY_NODE_NAME"], "ssh_key_path": os.environ["LAB_RASPBERRY_SSH_KEY_PATH"], } node_labels["raspberrypi"] = raspberry_labels with open(spec_file, encoding="utf-8") as handle: for line in handle: line = line.rstrip("\n") if not line: continue key, host, user, node_name, ssh_key_path = line.split("\t") nodes[key] = { "host": host, "user": user, "node_name": node_name, "ssh_key_path": ssh_key_path, } node_labels[key] = pimox_labels with open(var_file, "w", encoding="utf-8") as handle: json.dump({"worker_nodes": nodes, "worker_node_labels": node_labels}, handle, indent=2) handle.write("\n") PY } run_pimox_pipeline() { local mode="${LAB_PIMOX_PIPELINE:-auto}" local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}" local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}" local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}" local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}" local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}" local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}" local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}" local template_replace_existing="${LAB_PIMOX_TEMPLATE_REPLACE_EXISTING:-${TF_VAR_pimox_template_replace_existing:-false}}" local provisioning_interface local worker_count="${LAB_PIMOX_WORKER_COUNT:-1}" local worker_base_vmid="${LAB_PIMOX_WORKER_BASE_VMID:-9010}" local worker_name_prefix="${LAB_PIMOX_WORKER_NAME_PREFIX:-pimox-worker}" local worker_node_prefix="${LAB_PIMOX_WORKER_NODE_PREFIX:-pimox-worker}" local worker_key_prefix="${LAB_PIMOX_WORKER_KEY_PREFIX:-pimox}" local worker_skip_indexes="${LAB_PIMOX_SKIP_WORKER_INDEXES:-1}" local worker_cores="${LAB_PIMOX_WORKER_CORES:-2}" local worker_memory="${LAB_PIMOX_WORKER_MEMORY:-2048}" local worker_storage="${LAB_PIMOX_WORKER_STORAGE:-${TF_VAR_pimox_worker_storage:-nvme_thin_pool}}" local worker_user="${LAB_PIMOX_WORKER_USER:-jv}" local worker_key_path="${LAB_PIMOX_WORKER_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" local ip_prefix="${LAB_PIMOX_GUEST_IP_PREFIX:-192.168.100.}" local timeout_seconds="${LAB_PIMOX_GUEST_TIMEOUT_SECONDS:-3600}" local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv" local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json" local index local readiness_output local readiness_status if disabled_value "${mode}"; then return 0 fi if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then mode="true" fi if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2 exit 1 fi if ! [[ "${worker_storage}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then echo "LAB_PIMOX_WORKER_STORAGE must be a valid Pimox storage identifier." >&2 exit 1 fi if [[ "${worker_storage}" == "local" ]]; then echo "LAB_PIMOX_WORKER_STORAGE cannot be local; only the Pimox template VM should live on local storage." >&2 exit 1 fi set +e readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then echo 'qm was not found in PATH and ${qm_bin} is not executable' exit 1 fi if ! ip link show '${bridge}' >/dev/null 2>&1; then echo 'bridge ${bridge} was not found' exit 1 fi if ! sudo -n true >/dev/null 2>&1; then echo 'passwordless sudo is not available for ${pimox_user}' exit 1 fi" 2>&1)" readiness_status=$? set -e if ((readiness_status != 0)); then if [[ "${mode}" == "auto" ]]; then echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready." return 0 fi echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2 exit 1 fi ensure_python3 provisioning_interface="${TF_VAR_provisioning_interface:-${LAB_PROVISIONING_INTERFACE:-$(detect_route_interface "${pimox_host}")}}" if [[ -z "${provisioning_interface}" ]]; then echo "Could not detect the Debian interface used to reach ${pimox_host}; set LAB_PROVISIONING_INTERFACE." >&2 exit 1 fi export TF_VAR_provisioning_interface="${provisioning_interface}" export TF_VAR_pimox_host="${pimox_host}" export TF_VAR_pimox_user="${pimox_user}" export TF_VAR_pimox_ssh_key_path="${pimox_key}" export TF_VAR_pimox_qm_bin="${qm_bin}" export TF_VAR_pimox_template_bridge="${bridge}" export TF_VAR_pimox_template_vmid="${template_vmid}" export TF_VAR_pimox_template_name="${template_name}" export TF_VAR_pimox_template_replace_existing="${template_replace_existing}" export TF_VAR_pimox_template_builder_enabled="${TF_VAR_pimox_template_builder_enabled:-true}" export TF_VAR_pimox_template_build_ssh_key_path="${TF_VAR_pimox_template_build_ssh_key_path:-${worker_key_path}}" export TF_VAR_pimox_template_build_user="${TF_VAR_pimox_template_build_user:-${worker_user}}" export TF_VAR_pimox_template_guest_ip_prefix="${TF_VAR_pimox_template_guest_ip_prefix:-${ip_prefix}}" export TF_VAR_pimox_template_build_timeout_seconds="${TF_VAR_pimox_template_build_timeout_seconds:-${timeout_seconds}}" echo "Preparing Pimox provisioning and Debian worker template on ${pimox_host} without changing Orange Pi host networking..." run_tofu_stack "bootstrap/provisioning" if ((worker_count == 0)); then return 0 fi if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2 exit 1 fi pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1" echo "Worker VM clones will be created on Pimox storage ${worker_storage}; template VM ${template_vmid} stays on its configured template storage." mkdir -p "${REPO_ROOT}/.lab" : >"${spec_file}" for ((index = 1; index <= worker_count; index++)); do if worker_index_is_skipped "${index}" "${worker_skip_indexes}"; then echo "Skipping Pimox worker index ${index} because LAB_PIMOX_SKIP_WORKER_INDEXES=${worker_skip_indexes}." continue fi ensure_pimox_worker_node \ "${index}" \ "${spec_file}" \ "${pimox_host}" \ "${pimox_user}" \ "${pimox_key}" \ "${template_vmid}" \ "${bridge}" \ "${worker_base_vmid}" \ "${worker_name_prefix}" \ "${worker_node_prefix}" \ "${worker_key_prefix}" \ "${worker_cores}" \ "${worker_memory}" \ "${worker_user}" \ "${worker_key_path}" \ "${ip_prefix}" \ "${timeout_seconds}" \ "${qm_bin}" \ "${worker_storage}" done write_cluster_worker_var_file "${spec_file}" "${var_file}" export LAB_CLUSTER_VAR_FILE="${var_file}" } run_openwrt_pipeline() { local mode="${LAB_OPENWRT_VM:-${LAB_OPENWRT_PIPELINE:-false}}" local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}" local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}" local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}" local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}" local vmid="${LAB_OPENWRT_VMID:-9050}" local vm_name="${LAB_OPENWRT_NAME:-openwrt-firewall}" local storage="${LAB_OPENWRT_STORAGE:-nvme_thin_pool}" local wan_bridge="${LAB_OPENWRT_WAN_BRIDGE:-vmbr0}" local lan_bridge="${LAB_OPENWRT_LAN_BRIDGE:-vmbr1}" local cores="${LAB_OPENWRT_CORES:-2}" local memory="${LAB_OPENWRT_MEMORY:-512}" local version="${LAB_OPENWRT_VERSION:-24.10.6}" local image_url="${LAB_OPENWRT_IMAGE_URL:-}" local lan_ip="${LAB_OPENWRT_LAN_IP:-192.168.50.1}" local lan_netmask="${LAB_OPENWRT_LAN_NETMASK:-255.255.255.0}" local lan_dhcp_enabled="${LAB_OPENWRT_LAN_DHCP_ENABLED:-false}" local start_vm="${LAB_OPENWRT_START:-true}" local root_key_path="${LAB_OPENWRT_ROOT_SSH_PUBLIC_KEY_PATH:-${pimox_key}.pub}" local root_key_b64="" local lan_dhcp_ignore="1" local start_vm_flag="false" if disabled_value "${mode}"; then return 0 fi if ! truthy "${mode}"; then echo "LAB_OPENWRT_VM must be true or false." >&2 exit 1 fi if [[ -z "${image_url}" ]]; then image_url="https://downloads.openwrt.org/releases/${version}/targets/armsr/armv8/openwrt-${version}-armsr-armv8-generic-ext4-combined-efi.img.gz" fi if ! [[ "${vmid}" =~ ^[0-9]+$ ]]; then echo "LAB_OPENWRT_VMID must be a numeric Pimox VMID." >&2 exit 1 fi for value_name in storage wan_bridge lan_bridge vm_name; do local value="${!value_name}" if ! [[ "${value}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then echo "LAB_OPENWRT_${value_name^^} contains unsupported characters." >&2 exit 1 fi done if [[ "${storage}" == "local" ]]; then echo "LAB_OPENWRT_STORAGE cannot be local; reserve local storage for the Pimox Debian template." >&2 exit 1 fi if ! [[ "${lan_ip}" =~ ^[0-9.]+$ && "${lan_netmask}" =~ ^[0-9.]+$ ]]; then echo "LAB_OPENWRT_LAN_IP and LAB_OPENWRT_LAN_NETMASK must be IPv4-style values." >&2 exit 1 fi if truthy "${lan_dhcp_enabled}"; then lan_dhcp_ignore="0" fi if truthy "${start_vm}"; then start_vm_flag="true" fi if [[ -r "${root_key_path}" ]]; then root_key_b64="$(base64 <"${root_key_path}" | tr -d '\n')" fi echo "Preparing OpenWrt firewall VM ${vmid} on ${pimox_host}; validating ${wan_bridge}, ${lan_bridge}, and ${storage} without changing Orange Pi networking..." pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "bash -s" <&2 exit 1 fi pvesm_cmd="\$(command -v pvesm 2>/dev/null || true)" if [ -z "\$pvesm_cmd" ] && [ -x /usr/sbin/pvesm ]; then pvesm_cmd=/usr/sbin/pvesm fi if [ -z "\$pvesm_cmd" ]; then echo "pvesm was not found; cannot validate Pimox storage \$storage" >&2 exit 1 fi if ! sudo -n true >/dev/null 2>&1; then echo "passwordless sudo is required for OpenWrt VM automation" >&2 exit 1 fi if ! ip link show "\$wan_bridge" >/dev/null 2>&1; then echo "WAN bridge \$wan_bridge does not exist. Refusing to change Orange Pi networking." >&2 exit 1 fi if ! ip link show "\$lan_bridge" >/dev/null 2>&1; then echo "LAN bridge \$lan_bridge does not exist. Create it manually before enabling OpenWrt automation." >&2 exit 1 fi if ! sudo "\$pvesm_cmd" status | awk -v storage="\$storage" 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then echo "Pimox storage \$storage was not found." >&2 exit 1 fi if sudo "\$qm_cmd" status "\$vmid" >/dev/null 2>&1; then if sudo "\$qm_cmd" config "\$vmid" | grep -q '^template: 1$'; then echo "VM \$vmid exists as a template; refusing to reuse it for OpenWrt." >&2 exit 1 fi sudo "\$qm_cmd" set "\$vmid" \\ --net0 "virtio,bridge=\$wan_bridge" \\ --net1 "virtio,bridge=\$lan_bridge" \\ --cores "\$cores" \\ --memory "\$memory" \\ --onboot 1 if [ "\$start_vm" = "true" ] && sudo "\$qm_cmd" status "\$vmid" | grep -q 'status: stopped'; then sudo "\$qm_cmd" start "\$vmid" fi exit 0 fi for required_cmd in curl gzip losetup mount umount awk sed; do if ! command -v "\$required_cmd" >/dev/null 2>&1; then echo "\$required_cmd is required on the Pimox host for OpenWrt image preparation" >&2 exit 1 fi done tmp_dir="\$(mktemp -d /tmp/homelab-openwrt.XXXXXX)" mnt_dir="\$tmp_dir/root" loopdev="" cleanup() { if mountpoint -q "\$mnt_dir" 2>/dev/null; then sudo umount "\$mnt_dir" || sudo umount -l "\$mnt_dir" || true fi if [ -n "\$loopdev" ]; then sudo losetup -d "\$loopdev" >/dev/null 2>&1 || true fi rm -rf "\$tmp_dir" } trap cleanup EXIT mkdir -p "\$mnt_dir" curl -fsSL "\$image_url" -o "\$tmp_dir/openwrt.img.gz" gzip -dc "\$tmp_dir/openwrt.img.gz" >"\$tmp_dir/openwrt.img" loopdev="\$(sudo losetup --find --partscan --show "\$tmp_dir/openwrt.img")" root_part="\${loopdev}p2" if [ ! -b "\$root_part" ] && echo "\$loopdev" | grep -q 'loop[0-9]\$'; then root_part="\${loopdev}p2" fi if [ ! -b "\$root_part" ]; then echo "Could not find OpenWrt root partition \$root_part after attaching image." >&2 exit 1 fi sudo mount "\$root_part" "\$mnt_dir" sudo mkdir -p "\$mnt_dir/etc/config" "\$mnt_dir/etc/dropbear" "\$mnt_dir/root/.ssh" cat >"\$tmp_dir/network" <"\$tmp_dir/dhcp" <"\$tmp_dir/firewall" <<'FIREWALL' config defaults option input 'REJECT' option output 'ACCEPT' option forward 'REJECT' option synflood_protect '1' config zone option name 'lan' list network 'lan' option input 'ACCEPT' option output 'ACCEPT' option forward 'ACCEPT' config zone option name 'wan' list network 'wan' option input 'REJECT' option output 'ACCEPT' option forward 'REJECT' option masq '1' option mtu_fix '1' config forwarding option src 'lan' option dest 'wan' config rule option name 'Allow-DHCP-Renew' option src 'wan' option proto 'udp' option dest_port '68' option target 'ACCEPT' option family 'ipv4' config rule option name 'Allow-Ping' option src 'wan' option proto 'icmp' option icmp_type 'echo-request' option family 'ipv4' option target 'ACCEPT' FIREWALL cat >"\$tmp_dir/system" <"\$tmp_dir/authorized_keys" sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/etc/dropbear/authorized_keys" sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys" sudo chmod 0600 "\$mnt_dir/etc/dropbear/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys" fi sync sudo umount "\$mnt_dir" sudo losetup -d "\$loopdev" loopdev="" sudo "\$qm_cmd" create "\$vmid" \\ --name "\$vm_name" \\ --bios ovmf \\ --cores "\$cores" \\ --memory "\$memory" \\ --net0 "virtio,bridge=\$wan_bridge" \\ --net1 "virtio,bridge=\$lan_bridge" \\ --numa 0 \\ --ostype l26 \\ --scsihw virtio-scsi-pci \\ --sockets 1 \\ --vga virtio \\ --onboot 1 sudo "\$qm_cmd" set "\$vmid" --efidisk0 "\$storage:1,efitype=4m,pre-enrolled-keys=0" sudo "\$qm_cmd" importdisk "\$vmid" "\$tmp_dir/openwrt.img" "\$storage" --format raw >/dev/null disk_volume="\$(sudo "\$qm_cmd" config "\$vmid" | awk -F': ' '/^unused[0-9]+:/ { print \$2; exit }')" if [ -z "\$disk_volume" ]; then echo "Could not find imported OpenWrt disk volume for VM \$vmid" >&2 exit 1 fi sudo "\$qm_cmd" set "\$vmid" --scsi0 "\$disk_volume" sudo "\$qm_cmd" set "\$vmid" --boot "order=scsi0" if [ "\$start_vm" = "true" ]; then sudo "\$qm_cmd" start "\$vmid" fi EOF } cleanup_calico_links() { ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true sudo ip link delete vxlan.calico 2>/dev/null || true sudo ip link delete tunl0 2>/dev/null || true sudo ip link delete cni0 2>/dev/null || true sudo ip link delete kube-ipvs0 2>/dev/null || true ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true } cleanup_iptables() { sudo iptables -F || true sudo iptables -X || true sudo iptables -t nat -F || true sudo iptables -t nat -X || true sudo iptables -t mangle -F || true sudo iptables -t mangle -X || true sudo iptables -t raw -F || true sudo iptables -t raw -X || true if command -v ipvsadm >/dev/null 2>&1; then sudo ipvsadm --clear || true fi } cleanup_calico_runtime_files() { local path for path in /run/calico /var/run/calico; do if sudo test -e "${path}"; then sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true sudo rmdir "${path}" 2>/dev/null || true fi done } restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then sudo rm -f /etc/resolv.conf sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf fi sudo systemctl restart systemd-resolved 2>/dev/null || true } cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <( for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true done | sort -ru ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true) sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true } cleanup_node() { sudo kubeadm reset --force || true sudo systemctl stop kubelet 2>/dev/null || true sudo systemctl stop containerd 2>/dev/null || true sudo killall containerd-shim-runc-v2 2>/dev/null || true cleanup_mounts sudo rm -rf \ /etc/kubernetes/ \ /var/lib/etcd/ \ /var/lib/kubelet/ \ /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables cleanup_calico_links restore_node_dns sudo mkdir -p /etc/containerd/certs.d sudo systemctl reset-failed kubelet containerd 2>/dev/null || true sudo systemctl start containerd 2>/dev/null || true } website_registry_endpoint() { local image image="$(awk '$1 == "image:" && $2 ~ /php-website/ {print $2; exit}' "${REPO_ROOT}/apps/website/web-app.yaml")" if [[ -z "${image}" || "${image}" != */* ]]; then echo "Could not determine website registry endpoint from apps/website/web-app.yaml" >&2 exit 1 fi printf '%s\n' "${image%%/*}" } demos_registry_endpoint() { local image image="$(awk '$1 == "image:" && $2 ~ /demos-static/ {print $2; exit}' "${REPO_ROOT}/apps/demos-static/web-app.yaml")" if [[ -z "${image}" || "${image}" != */* ]]; then echo "Could not determine demos registry endpoint from apps/demos-static/web-app.yaml" >&2 exit 1 fi printf '%s\n' "${image%%/*}" } website_source_hash() { ( cd "${REPO_ROOT}" find apps/website -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}' ) } demos_source_hash() { ( cd "${REPO_ROOT}" find apps/demos-static -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}' ) } registry_image_exists() { local registry_endpoint="$1" local repository="$2" local tag="$3" local accept_header if ! command -v curl >/dev/null 2>&1; then return 1 fi accept_header="application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json" curl -fsS \ -H "Accept: ${accept_header}" \ "http://${registry_endpoint}/v2/${repository}/manifests/${tag}" >/dev/null } image_state_value() { local state_file="$1" local key="$2" awk -F= -v key="${key}" '$1 == key {print substr($0, index($0, "=") + 1); exit}' "${state_file}" 2>/dev/null || true } website_image_is_current() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" local registry_endpoint="$5" local saved_hash local saved_platforms local saved_image [[ -f "${state_file}" ]] || return 1 saved_hash="$(image_state_value "${state_file}" source_hash)" saved_platforms="$(image_state_value "${state_file}" platforms)" saved_image="$(image_state_value "${state_file}" image)" [[ "${saved_hash}" == "${source_hash}" ]] || return 1 [[ "${saved_platforms}" == "${platforms}" ]] || return 1 [[ "${saved_image}" == "${image_ref}" ]] || return 1 registry_image_exists "${registry_endpoint}" php-website latest } demos_image_is_current() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" local registry_endpoint="$5" local saved_hash local saved_platforms local saved_image [[ -f "${state_file}" ]] || return 1 saved_hash="$(image_state_value "${state_file}" source_hash)" saved_platforms="$(image_state_value "${state_file}" platforms)" saved_image="$(image_state_value "${state_file}" image)" [[ "${saved_hash}" == "${source_hash}" ]] || return 1 [[ "${saved_platforms}" == "${platforms}" ]] || return 1 [[ "${saved_image}" == "${image_ref}" ]] || return 1 registry_image_exists "${registry_endpoint}" demos-static latest } write_website_image_state() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" mkdir -p "$(dirname "${state_file}")" { printf 'source_hash=%s\n' "${source_hash}" printf 'platforms=%s\n' "${platforms}" printf 'image=%s\n' "${image_ref}" } > "${state_file}" } write_demos_image_state() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" mkdir -p "$(dirname "${state_file}")" { printf 'source_hash=%s\n' "${source_hash}" printf 'platforms=%s\n' "${platforms}" printf 'image=%s\n' "${image_ref}" } > "${state_file}" } path_available_mb() { local path="$1" while [[ ! -e "${path}" && "${path}" != "/" ]]; do path="$(dirname "${path}")" done df -Pm "${path}" | awk 'NR == 2 {print $4}' } docker_root_dir() { docker info --format '{{.DockerRootDir}}' 2>/dev/null || printf '/var/lib/docker\n' } prune_unused_docker_build_data() { docker buildx rm lab-builder 2>/dev/null || true docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true docker builder prune -af 2>/dev/null || true docker system prune -af 2>/dev/null || true } ensure_docker_build_space() { local docker_root local free_mb local min_free_mb min_free_mb="${DOCKER_BUILD_MIN_FREE_MB:-4096}" docker_root="$(docker_root_dir)" free_mb="$(path_available_mb "${docker_root}")" if (( free_mb >= min_free_mb )); then return 0 fi echo "Docker data root ${docker_root} has ${free_mb}MiB free; pruning unused Docker build data..." prune_unused_docker_build_data free_mb="$(path_available_mb "${docker_root}")" if (( free_mb < min_free_mb )); then echo "Docker data root ${docker_root} still has only ${free_mb}MiB free after cleanup." >&2 echo "Free space there or move Docker's data-root to a larger filesystem such as /home before building." >&2 echo "Override the threshold with DOCKER_BUILD_MIN_FREE_MB if this host can build with less space." >&2 exit 1 fi } prepare_buildx_builder() { local registry_endpoint="$1" docker run --rm --privileged multiarch/qemu-user-static --reset -p yes cat < "${BUILDX_CONFIG}" [registry."${registry_endpoint}"] http = true insecure = true [registry."127.0.0.1:30500"] http = true insecure = true [registry."localhost:30500"] http = true insecure = true EOF docker buildx rm lab-builder 2>/dev/null || true docker buildx create --name lab-builder --driver docker-container --driver-opt network=host --config "${BUILDX_CONFIG}" --use docker buildx inspect --bootstrap } dump_argocd_debug() { local app="$1" kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true } dump_namespace_debug() { local namespace="$1" kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true } wait_for_namespace() { local namespace="$1" local app="$2" local timeout_seconds="$3" local elapsed=0 until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2 dump_argocd_debug "${app}" exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } wait_for_namespaced_resource() { local namespace="$1" local kind="$2" local name="$3" local app="$4" local timeout_seconds="$5" local elapsed=0 until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2 dump_argocd_debug "${app}" kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } wait_for_deployment_ready() { local namespace="$1" local deployment="$2" local app="$3" local timeout_seconds="$4" local desired_replicas local ready_replicas local elapsed=0 desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" desired_replicas="${desired_replicas:-1}" until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \ (( ${ready_replicas:-0} >= desired_replicas )); do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2 dump_argocd_debug "${app}" dump_namespace_debug "${namespace}" exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } apply_gitea_bootstrap_manifests() { kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/namespace.yaml" kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/storage.yaml" kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/service.yaml" kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/deployment.yaml" wait_for_namespace gitea-system gitea 300 wait_for_namespaced_resource gitea-system deployment gitea gitea 300 wait_for_deployment_ready gitea-system gitea gitea 300 } install_gitea_backup_timer() { local backup_script="/usr/local/sbin/homelab-gitea-backup.sh" sudo tee "${backup_script}" >/dev/null </dev/null 2>&1; then echo "kubectl is required for Gitea backups." >&2 exit 1 fi pod="\$(kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" get pods \ -l "\${GITEA_SELECTOR}" \ --field-selector=status.phase=Running \ -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" if [[ -z "\${pod}" ]]; then echo "Skipping Gitea backup: no running Gitea pod found." exit 0 fi timestamp="\$(date -u +%Y%m%dT%H%M%SZ)" tmp_archive="\$(mktemp "/tmp/gitea-\${timestamp}.XXXXXX.zip")" backup_archive="\${GITEA_BACKUP_DIR}/gitea-\${timestamp}.zip" cleanup() { rm -f "\${tmp_archive}" kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true } trap cleanup EXIT kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \ sh -c 'mkdir -p /data/git/repositories && chown git:git /data/git /data/git/repositories' kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \ su-exec git gitea dump -c /data/gitea/conf/app.ini --file "\${REMOTE_ARCHIVE}" kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" cp -c "\${GITEA_CONTAINER}" \ "\${GITEA_NAMESPACE}/\${pod}:\${REMOTE_ARCHIVE}" "\${tmp_archive}" sudo mkdir -p "\${GITEA_BACKUP_DIR}" sudo install -m 0640 -o root -g root "\${tmp_archive}" "\${backup_archive}" sudo find "\${GITEA_BACKUP_DIR}" -type f -name 'gitea-*.zip' -mtime +"\${GITEA_BACKUP_RETENTION_DAYS}" -delete echo "Created \${backup_archive}" BACKUP_SCRIPT_EOT sudo chmod 0755 "${backup_script}" sudo tee /etc/systemd/system/homelab-gitea-backup.service >/dev/null <<'SERVICE_EOT' [Unit] Description=Back up in-cluster Gitea to Debian host storage After=network-online.target Wants=network-online.target [Service] Type=oneshot ExecStart=/usr/local/sbin/homelab-gitea-backup.sh SERVICE_EOT sudo tee /etc/systemd/system/homelab-gitea-backup.timer >/dev/null <<'TIMER_EOT' [Unit] Description=Run daily Homelab Gitea backups [Timer] OnCalendar=*-*-* 02:35:00 RandomizedDelaySec=20m Persistent=true [Install] WantedBy=timers.target TIMER_EOT sudo systemctl daemon-reload sudo systemctl enable --now homelab-gitea-backup.timer >/dev/null } backup_gitea() { require_debian_server "backup-gitea" export KUBECONFIG="${KUBECONFIG_PATH}" install_gitea_backup_timer sudo /usr/local/sbin/homelab-gitea-backup.sh } install_gitea_runner() { local runner_arch local runner_home="${GITEA_RUNNER_HOME:-/home/jv/.local/share/gitea-runner/my-homelab-configs}" local runner_instance="${GITEA_RUNNER_INSTANCE_URL:-https://lab2025.duckdns.org/git/}" local runner_labels="${GITEA_RUNNER_LABELS:-homelab-debian:host}" local runner_name="${GITEA_RUNNER_NAME:-homelab-debian-my-homelab-configs}" local runner_token="${GITEA_RUNNER_REGISTRATION_TOKEN:-${1:-}}" local runner_user="${GITEA_RUNNER_USER:-jv}" local runner_version="${GITEA_ACT_RUNNER_VERSION:-0.2.11}" local missing_packages=() require_debian_server "install-gitea-runner" case "$(dpkg --print-architecture)" in amd64) runner_arch="linux-amd64" ;; arm64) runner_arch="linux-arm64" ;; *) echo "Unsupported Debian architecture: $(dpkg --print-architecture)" >&2 exit 1 ;; esac for package in ca-certificates curl git nodejs python3; do if ! dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "install ok installed"; then missing_packages+=("$package") fi done if [[ ${#missing_packages[@]} -gt 0 ]]; then sudo apt-get update sudo apt-get install -y --no-install-recommends "${missing_packages[@]}" fi sudo curl -fsSL \ -o /usr/local/bin/act_runner \ "https://gitea.com/gitea/act_runner/releases/download/v${runner_version}/act_runner-${runner_version}-${runner_arch}" sudo chmod 0755 /usr/local/bin/act_runner sudo chown root:root /usr/local/bin/act_runner sudo -u "${runner_user}" mkdir -p "${runner_home}" if [[ ! -f "${runner_home}/.runner" ]]; then if [[ -z "${runner_token}" ]]; then echo "Set GITEA_RUNNER_REGISTRATION_TOKEN to the repository-level runner token from Gitea." >&2 exit 1 fi sudo -u "${runner_user}" env \ HOME="/home/${runner_user}" \ GITEA_RUNNER_HOME="${runner_home}" \ GITEA_RUNNER_INSTANCE_URL="${runner_instance}" \ GITEA_RUNNER_REGISTRATION_TOKEN="${runner_token}" \ GITEA_RUNNER_NAME="${runner_name}" \ GITEA_RUNNER_LABELS="${runner_labels}" \ bash -lc 'cd "${GITEA_RUNNER_HOME}" && /usr/local/bin/act_runner register --no-interactive --instance "${GITEA_RUNNER_INSTANCE_URL}" --token "${GITEA_RUNNER_REGISTRATION_TOKEN}" --name "${GITEA_RUNNER_NAME}" --labels "${GITEA_RUNNER_LABELS}"' else echo "Existing runner registration found at ${runner_home}/.runner; keeping it." fi sudo tee /etc/systemd/system/homelab-gitea-runner.service >/dev/null </dev/null sudo systemctl status homelab-gitea-runner.service --no-pager -l } recreate_pods_for_selector() { local namespace="$1" local selector="$2" local app="$3" if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2 dump_argocd_debug "${app}" dump_namespace_debug "${namespace}" exit 1 fi } refresh_argocd_application() { local app="$1" kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null } apps() { local buildx_builder_ready=false local demos_image_built=false local demos_image_ref local demos_image_state_file local demos_platforms local demos_registry_endpoint local demos_source_hash local registry_endpoint local website_image_built=false local website_image_ref local website_image_state_file local website_platforms local website_source_hash require_debian_server "apps" registry_endpoint="$(website_registry_endpoint)" demos_registry_endpoint="$(demos_registry_endpoint)" demos_image_ref="${registry_endpoint}/demos-static:latest" demos_image_state_file="${REPO_ROOT}/.lab/demos-static-image.state" demos_platforms="${DEMOS_IMAGE_PLATFORMS:-linux/arm64}" demos_source_hash="$(demos_source_hash)" website_image_ref="${registry_endpoint}/php-website:latest" website_image_state_file="${REPO_ROOT}/.lab/php-website-image.state" website_platforms="${WEBSITE_IMAGE_PLATFORMS:-linux/arm64}" website_source_hash="$(website_source_hash)" export TF_VAR_registry_endpoint="${TF_VAR_registry_endpoint:-${registry_endpoint}}" export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}" export KUBECONFIG="${TF_VAR_kubeconfig_path}" if [[ "${TF_VAR_registry_endpoint}" != "${registry_endpoint}" ]]; then echo "TF_VAR_registry_endpoint must match apps/website/web-app.yaml (${registry_endpoint})" >&2 exit 1 fi if [[ "${demos_registry_endpoint}" != "${registry_endpoint}" ]]; then echo "apps/demos-static/web-app.yaml registry endpoint (${demos_registry_endpoint}) must match apps/website/web-app.yaml (${registry_endpoint})" >&2 exit 1 fi echo "Deploying homelab applications..." apply_gitea_bootstrap_manifests run_tofu_stack "bootstrap/apps" refresh_argocd_application container-registry refresh_argocd_application demos-static refresh_argocd_application gitea refresh_argocd_application website-production wait_for_namespace container-registry container-registry 300 wait_for_namespaced_resource container-registry deployment local-registry container-registry 300 wait_for_deployment_ready container-registry local-registry container-registry 300 if website_image_is_current "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" "${registry_endpoint}"; then echo "Website image ${website_image_ref} is already current (${website_source_hash}); skipping build." else echo "Building website image ${website_image_ref} for ${website_platforms} (${website_source_hash})..." ensure_docker_build_space if [[ "${buildx_builder_ready}" != "true" ]]; then prepare_buildx_builder "${registry_endpoint}" buildx_builder_ready=true fi docker buildx build \ --network host \ --platform "${website_platforms}" \ --provenance=false \ --sbom=false \ --label "dev.homelab.website.source-hash=${website_source_hash}" \ -t "${website_image_ref}" \ -f "${REPO_ROOT}/apps/website/Dockerfile" \ "${REPO_ROOT}/apps/website/" \ --push website_image_built=true fi if demos_image_is_current "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" "${registry_endpoint}"; then echo "Demos image ${demos_image_ref} is already current (${demos_source_hash}); skipping build." else echo "Building demos image ${demos_image_ref} for ${demos_platforms} (${demos_source_hash})..." ensure_docker_build_space if [[ "${buildx_builder_ready}" != "true" ]]; then prepare_buildx_builder "${registry_endpoint}" buildx_builder_ready=true fi docker buildx build \ --network host \ --platform "${demos_platforms}" \ --provenance=false \ --sbom=false \ --label "dev.homelab.demos.source-hash=${demos_source_hash}" \ -t "${demos_image_ref}" \ -f "${REPO_ROOT}/apps/demos-static/Dockerfile" \ "${REPO_ROOT}/apps/demos-static/" \ --push demos_image_built=true fi refresh_argocd_application website-production wait_for_namespace website-production website-production 300 wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300 if [[ "${website_image_built}" == "true" ]]; then recreate_pods_for_selector website-production app=php-website website-production else echo "Skipping website pod restart because the image did not change." fi wait_for_deployment_ready website-production php-website-deployment website-production 300 if [[ "${website_image_built}" == "true" ]]; then write_website_image_state "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" fi refresh_argocd_application demos-static wait_for_namespace demos-static demos-static 300 wait_for_namespaced_resource demos-static deployment demos-static demos-static 300 if [[ "${demos_image_built}" == "true" ]]; then recreate_pods_for_selector demos-static app=demos-static demos-static else echo "Skipping demos pod restart because the image did not change." fi wait_for_deployment_ready demos-static demos-static demos-static 300 if [[ "${demos_image_built}" == "true" ]]; then write_demos_image_state "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" fi echo "Application deployment successfully completed." } up() { require_debian_server "up" echo "Deploying the homelab infrastructure..." run_pimox_pipeline run_openwrt_pipeline run_tofu_stack "bootstrap/cluster" run_tofu_stack "bootstrap/platform" install_gitea_backup_timer apps run_tofu_stack "bootstrap/edge" echo "Deployment successfully completed." } nuke() { local worker_ssh_targets local worker_targets local target require_debian_server "nuke" echo "Brutally nuking the homelab infrastructure..." worker_ssh_targets="${WORKER_SSH_TARGETS-jv@192.168.100.89}" read -r -a worker_targets <<< "${worker_ssh_targets}" echo "--> Terminating local OpenTofu tasks..." killall tofu terraform 2>/dev/null || true echo "--> Eviscerating local Kubernetes components..." cleanup_node sudo rm -f "${KUBECONFIG_PATH}" for target in "${worker_targets[@]}"; do echo "--> Eviscerating remote Kubernetes components (${target})..." if ! ssh -o ConnectTimeout=5 "${target}" "bash -s" <<'EOF' set -euo pipefail cleanup_calico_links() { ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true sudo ip link delete vxlan.calico 2>/dev/null || true sudo ip link delete tunl0 2>/dev/null || true sudo ip link delete cni0 2>/dev/null || true sudo ip link delete kube-ipvs0 2>/dev/null || true ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true } cleanup_iptables() { sudo iptables -F || true sudo iptables -X || true sudo iptables -t nat -F || true sudo iptables -t nat -X || true sudo iptables -t mangle -F || true sudo iptables -t mangle -X || true sudo iptables -t raw -F || true sudo iptables -t raw -X || true if command -v ipvsadm >/dev/null 2>&1; then sudo ipvsadm --clear || true fi } cleanup_calico_runtime_files() { local path for path in /run/calico /var/run/calico; do if sudo test -e "${path}"; then sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true sudo rmdir "${path}" 2>/dev/null || true fi done } restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then sudo rm -f /etc/resolv.conf sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf fi sudo systemctl restart systemd-resolved 2>/dev/null || true } cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <( for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true done | sort -ru ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true) sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true } sudo kubeadm reset --force || true sudo systemctl stop kubelet 2>/dev/null || true sudo systemctl stop containerd 2>/dev/null || true sudo killall containerd-shim-runc-v2 2>/dev/null || true cleanup_mounts sudo rm -rf \ /etc/kubernetes/ \ /var/lib/etcd/ \ /var/lib/kubelet/ \ /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables cleanup_calico_links restore_node_dns sudo mkdir -p /etc/containerd/certs.d sudo systemctl reset-failed kubelet containerd 2>/dev/null || true sudo systemctl start containerd 2>/dev/null || true EOF then echo "Remote cleanup failed for ${target}; not deleting OpenTofu state." >&2 exit 1 fi done docker buildx rm lab-builder 2>/dev/null || true docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true rm -f "${BUILDX_CONFIG}" || true echo "--> Deleting OpenTofu tracking state files..." rm -rf "${REPO_ROOT}"/bootstrap/cluster/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/cluster/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/cluster/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/platform/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/platform/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/platform/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/apps/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/apps/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/apps/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/edge/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/edge/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/edge/.terraform/ echo "Destruction complete. Retained data under /var/openebs/local was left intact." } case "${1:-}" in up) up ;; apps) apps ;; backup-gitea) backup_gitea ;; install-gitea-runner) install_gitea_runner "${2:-}" ;; nuke) nuke ;; *) echo "Usage: $0 {up|apps|backup-gitea|install-gitea-runner|nuke}" exit 1 ;; esac