my-homelab-configs/lab.sh

1691 lines
58 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILDX_CONFIG="/tmp/buildx-config.toml"
KUBECONFIG_PATH="${KUBECONFIG_PATH:-${TF_VAR_kubeconfig_path:-/home/jv/.kube/config}}"
trap 'rm -f "${BUILDX_CONFIG}"' EXIT
require_debian_server() {
local command_name="$1"
local os_id=""
if [[ "$(uname -s)" != "Linux" ]]; then
echo "Refusing to run '${command_name}' from this machine. Run it on the Debian homelab server." >&2
exit 1
fi
if [[ -r /etc/os-release ]]; then
os_id="$(awk -F= '$1 == "ID" {gsub(/"/, "", $2); print $2; exit}' /etc/os-release)"
fi
if [[ "${os_id}" != "debian" ]]; then
echo "Refusing to run '${command_name}' on ${os_id:-unknown OS}. Run it on the Debian homelab server." >&2
exit 1
fi
}
run_tofu_stack() {
local stack="$1"
local -a apply_args=(-auto-approve)
if [[ "${stack}" == "bootstrap/cluster" && -n "${LAB_CLUSTER_VAR_FILE:-}" ]]; then
apply_args+=("-var-file=${LAB_CLUSTER_VAR_FILE}")
fi
tofu -chdir="${REPO_ROOT}/${stack}" init
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
}
truthy() {
case "${1,,}" in
1 | true | yes | on)
return 0
;;
*)
return 1
;;
esac
}
disabled_value() {
case "${1,,}" in
0 | false | no | off | disabled)
return 0
;;
*)
return 1
;;
esac
}
worker_index_is_skipped() {
local index="$1"
local skip_indexes="$2"
local skip_index
skip_indexes="${skip_indexes//,/ }"
for skip_index in ${skip_indexes}; do
[[ -z "${skip_index}" ]] && continue
if ! [[ "${skip_index}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_SKIP_WORKER_INDEXES must contain only comma or space separated positive integers." >&2
exit 1
fi
if ((skip_index == index)); then
return 0
fi
done
return 1
}
ensure_python3() {
if command -v python3 >/dev/null 2>&1; then
return 0
fi
sudo apt-get update
sudo apt-get install -y --no-install-recommends python3
}
detect_route_interface() {
local target="$1"
ip route get "${target}" 2>/dev/null | awk '
{
for (i = 1; i <= NF; i++) {
if ($i == "dev") {
print $(i + 1)
exit
}
}
}
'
}
pimox_ssh() {
local host="$1"
local user="$2"
local key_path="$3"
shift 3
ssh -i "${key_path}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${user}@${host}" "$@"
}
pimox_guest_ipv4() {
local guest_json
local host="$1"
local user="$2"
local key_path="$3"
local vmid="$4"
local ip_prefix="$5"
local qm_bin="${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}"
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
if [[ -z "${guest_json}" ]]; then
return 1
fi
GUEST_JSON="${guest_json}" python3 - "${ip_prefix}" <<'PY'
import json
import os
import sys
prefix = sys.argv[1]
try:
interfaces = json.loads(os.environ.get("GUEST_JSON", ""))
except Exception:
sys.exit(1)
for iface in interfaces or []:
for address in iface.get("ip-addresses") or []:
if address.get("ip-address-type") != "ipv4":
continue
ip = address.get("ip-address", "")
if not ip or ip.startswith(("127.", "169.254.")):
continue
if prefix and not ip.startswith(prefix):
continue
print(ip)
sys.exit(0)
sys.exit(1)
PY
}
wait_for_pimox_guest_ssh() {
local host="$1"
local user="$2"
local key_path="$3"
local vmid="$4"
local guest_user="$5"
local guest_key_path="$6"
local ip_prefix="$7"
local timeout_seconds="$8"
local deadline
local guest_ip
deadline=$((SECONDS + timeout_seconds))
while ((SECONDS < deadline)); do
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" || true)"
if [[ -n "${guest_ip}" ]] &&
ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new "${guest_user}@${guest_ip}" true >/dev/null 2>&1; then
printf '%s\n' "${guest_ip}"
return 0
fi
sleep 10
done
return 1
}
pimox_generated_mac() {
local vmid="$1"
printf '02:68:10:%02x:%02x:%02x\n' \
$(((vmid >> 16) & 255)) \
$(((vmid >> 8) & 255)) \
$((vmid & 255))
}
ensure_pimox_worker_node() {
local index="$1"
local spec_file="$2"
local pimox_host="$3"
local pimox_user="$4"
local pimox_key="$5"
local template_vmid="$6"
local bridge="$7"
local worker_base_vmid="$8"
local worker_name_prefix="$9"
local worker_node_prefix="${10}"
local worker_key_prefix="${11}"
local worker_cores="${12}"
local worker_memory="${13}"
local worker_user="${14}"
local worker_key_path="${15}"
local ip_prefix="${16}"
local timeout_seconds="${17}"
local qm_bin="${18}"
local worker_storage="${19}"
local padded
local vmid
local worker_key
local worker_name
local node_name
local mac
local guest_ip
printf -v padded '%02d' "${index}"
vmid=$((worker_base_vmid + index - 1))
worker_key="${worker_key_prefix}${padded}"
worker_name="${worker_name_prefix}-${padded}"
node_name="${worker_node_prefix}-${padded}"
mac="$(pimox_generated_mac "${vmid}")"
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then
echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2
exit 1
fi
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${vmid}' --agent enabled=1
if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi"
else
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2
exit 1
fi
pvesm_cmd=\"\$(command -v pvesm 2>/dev/null || true)\"
if [ -z \"\$pvesm_cmd\" ] && [ -x /usr/sbin/pvesm ]; then
pvesm_cmd=/usr/sbin/pvesm
fi
if [ -z \"\$pvesm_cmd\" ]; then
echo 'pvesm was not found; cannot validate Pimox worker storage ${worker_storage}' >&2
exit 1
fi
if ! sudo \"\$pvesm_cmd\" status | awk -v storage='${worker_storage}' 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then
echo 'Pimox worker storage ${worker_storage} was not found. Refusing to create worker ${worker_name}.' >&2
exit 1
fi
sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 --storage '${worker_storage}'
sudo '${qm_bin}' set '${vmid}' --agent enabled=1
sudo '${qm_bin}' set '${vmid}' --cores '${worker_cores}' --memory '${worker_memory}'
sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}'
sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0'
sudo '${qm_bin}' set '${vmid}' --onboot 1
sudo '${qm_bin}' start '${vmid}'"
fi
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}")"; then
echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2
exit 1
fi
printf '%s\t%s\t%s\t%s\t%s\n' "${worker_key}" "${guest_ip}" "${worker_user}" "${node_name}" "${worker_key_path}" >>"${spec_file}"
}
write_cluster_worker_var_file() {
local spec_file="$1"
local var_file="$2"
LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-true}" \
LAB_RASPBERRY_HOST="${LAB_RASPBERRY_HOST:-192.168.100.89}" \
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
python3 - "${spec_file}" "${var_file}" <<'PY'
import json
import os
import sys
spec_file, var_file = sys.argv[1:3]
nodes = {}
node_labels = {}
try:
raspberry_labels = json.loads(os.environ["LAB_RASPBERRY_NODE_LABELS_JSON"])
pimox_labels = json.loads(os.environ["LAB_PIMOX_WORKER_NODE_LABELS_JSON"])
except json.JSONDecodeError as exc:
raise SystemExit(f"Invalid node label JSON: {exc}") from exc
if os.environ["LAB_INCLUDE_RASPBERRY_WORKER"].lower() not in {"0", "false", "no", "off", "disabled"}:
nodes["raspberrypi"] = {
"host": os.environ["LAB_RASPBERRY_HOST"],
"user": os.environ["LAB_RASPBERRY_USER"],
"node_name": os.environ["LAB_RASPBERRY_NODE_NAME"],
"ssh_key_path": os.environ["LAB_RASPBERRY_SSH_KEY_PATH"],
}
node_labels["raspberrypi"] = raspberry_labels
with open(spec_file, encoding="utf-8") as handle:
for line in handle:
line = line.rstrip("\n")
if not line:
continue
key, host, user, node_name, ssh_key_path = line.split("\t")
nodes[key] = {
"host": host,
"user": user,
"node_name": node_name,
"ssh_key_path": ssh_key_path,
}
node_labels[key] = pimox_labels
with open(var_file, "w", encoding="utf-8") as handle:
json.dump({"worker_nodes": nodes, "worker_node_labels": node_labels}, handle, indent=2)
handle.write("\n")
PY
}
run_pimox_pipeline() {
local mode="${LAB_PIMOX_PIPELINE:-auto}"
local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}"
local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}"
local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}"
local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}"
local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}"
local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}"
local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}"
local template_replace_existing="${LAB_PIMOX_TEMPLATE_REPLACE_EXISTING:-${TF_VAR_pimox_template_replace_existing:-false}}"
local provisioning_interface
local worker_count="${LAB_PIMOX_WORKER_COUNT:-1}"
local worker_base_vmid="${LAB_PIMOX_WORKER_BASE_VMID:-9010}"
local worker_name_prefix="${LAB_PIMOX_WORKER_NAME_PREFIX:-pimox-worker}"
local worker_node_prefix="${LAB_PIMOX_WORKER_NODE_PREFIX:-pimox-worker}"
local worker_key_prefix="${LAB_PIMOX_WORKER_KEY_PREFIX:-pimox}"
local worker_skip_indexes="${LAB_PIMOX_SKIP_WORKER_INDEXES:-1}"
local worker_cores="${LAB_PIMOX_WORKER_CORES:-2}"
local worker_memory="${LAB_PIMOX_WORKER_MEMORY:-2048}"
local worker_storage="${LAB_PIMOX_WORKER_STORAGE:-${TF_VAR_pimox_worker_storage:-nvme_thin_pool}}"
local worker_user="${LAB_PIMOX_WORKER_USER:-jv}"
local worker_key_path="${LAB_PIMOX_WORKER_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}"
local ip_prefix="${LAB_PIMOX_GUEST_IP_PREFIX:-192.168.100.}"
local timeout_seconds="${LAB_PIMOX_GUEST_TIMEOUT_SECONDS:-3600}"
local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv"
local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json"
local index
local readiness_output
local readiness_status
if disabled_value "${mode}"; then
return 0
fi
if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then
mode="true"
fi
if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2
exit 1
fi
if ! [[ "${worker_storage}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then
echo "LAB_PIMOX_WORKER_STORAGE must be a valid Pimox storage identifier." >&2
exit 1
fi
if [[ "${worker_storage}" == "local" ]]; then
echo "LAB_PIMOX_WORKER_STORAGE cannot be local; only the Pimox template VM should live on local storage." >&2
exit 1
fi
set +e
readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then
echo 'qm was not found in PATH and ${qm_bin} is not executable'
exit 1
fi
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'bridge ${bridge} was not found'
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo 'passwordless sudo is not available for ${pimox_user}'
exit 1
fi" 2>&1)"
readiness_status=$?
set -e
if ((readiness_status != 0)); then
if [[ "${mode}" == "auto" ]]; then
echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready."
return 0
fi
echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2
exit 1
fi
ensure_python3
provisioning_interface="${TF_VAR_provisioning_interface:-${LAB_PROVISIONING_INTERFACE:-$(detect_route_interface "${pimox_host}")}}"
if [[ -z "${provisioning_interface}" ]]; then
echo "Could not detect the Debian interface used to reach ${pimox_host}; set LAB_PROVISIONING_INTERFACE." >&2
exit 1
fi
export TF_VAR_provisioning_interface="${provisioning_interface}"
export TF_VAR_pimox_host="${pimox_host}"
export TF_VAR_pimox_user="${pimox_user}"
export TF_VAR_pimox_ssh_key_path="${pimox_key}"
export TF_VAR_pimox_qm_bin="${qm_bin}"
export TF_VAR_pimox_template_bridge="${bridge}"
export TF_VAR_pimox_template_vmid="${template_vmid}"
export TF_VAR_pimox_template_name="${template_name}"
export TF_VAR_pimox_template_replace_existing="${template_replace_existing}"
export TF_VAR_pimox_template_builder_enabled="${TF_VAR_pimox_template_builder_enabled:-true}"
export TF_VAR_pimox_template_build_ssh_key_path="${TF_VAR_pimox_template_build_ssh_key_path:-${worker_key_path}}"
export TF_VAR_pimox_template_build_user="${TF_VAR_pimox_template_build_user:-${worker_user}}"
export TF_VAR_pimox_template_guest_ip_prefix="${TF_VAR_pimox_template_guest_ip_prefix:-${ip_prefix}}"
export TF_VAR_pimox_template_build_timeout_seconds="${TF_VAR_pimox_template_build_timeout_seconds:-${timeout_seconds}}"
echo "Preparing Pimox provisioning and Debian worker template on ${pimox_host} without changing Orange Pi host networking..."
run_tofu_stack "bootstrap/provisioning"
if ((worker_count == 0)); then
return 0
fi
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then
echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2
exit 1
fi
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1"
echo "Worker VM clones will be created on Pimox storage ${worker_storage}; template VM ${template_vmid} stays on its configured template storage."
mkdir -p "${REPO_ROOT}/.lab"
: >"${spec_file}"
for ((index = 1; index <= worker_count; index++)); do
if worker_index_is_skipped "${index}" "${worker_skip_indexes}"; then
echo "Skipping Pimox worker index ${index} because LAB_PIMOX_SKIP_WORKER_INDEXES=${worker_skip_indexes}."
continue
fi
ensure_pimox_worker_node \
"${index}" \
"${spec_file}" \
"${pimox_host}" \
"${pimox_user}" \
"${pimox_key}" \
"${template_vmid}" \
"${bridge}" \
"${worker_base_vmid}" \
"${worker_name_prefix}" \
"${worker_node_prefix}" \
"${worker_key_prefix}" \
"${worker_cores}" \
"${worker_memory}" \
"${worker_user}" \
"${worker_key_path}" \
"${ip_prefix}" \
"${timeout_seconds}" \
"${qm_bin}" \
"${worker_storage}"
done
write_cluster_worker_var_file "${spec_file}" "${var_file}"
export LAB_CLUSTER_VAR_FILE="${var_file}"
}
run_openwrt_pipeline() {
local mode="${LAB_OPENWRT_VM:-${LAB_OPENWRT_PIPELINE:-false}}"
local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}"
local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}"
local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}"
local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}"
local vmid="${LAB_OPENWRT_VMID:-9050}"
local vm_name="${LAB_OPENWRT_NAME:-openwrt-firewall}"
local storage="${LAB_OPENWRT_STORAGE:-nvme_thin_pool}"
local wan_bridge="${LAB_OPENWRT_WAN_BRIDGE:-vmbr0}"
local lan_bridge="${LAB_OPENWRT_LAN_BRIDGE:-vmbr1}"
local cores="${LAB_OPENWRT_CORES:-2}"
local memory="${LAB_OPENWRT_MEMORY:-512}"
local version="${LAB_OPENWRT_VERSION:-24.10.6}"
local image_url="${LAB_OPENWRT_IMAGE_URL:-}"
local lan_ip="${LAB_OPENWRT_LAN_IP:-192.168.50.1}"
local lan_netmask="${LAB_OPENWRT_LAN_NETMASK:-255.255.255.0}"
local lan_dhcp_enabled="${LAB_OPENWRT_LAN_DHCP_ENABLED:-false}"
local start_vm="${LAB_OPENWRT_START:-true}"
local root_key_path="${LAB_OPENWRT_ROOT_SSH_PUBLIC_KEY_PATH:-${pimox_key}.pub}"
local root_key_b64=""
local lan_dhcp_ignore="1"
local start_vm_flag="false"
if disabled_value "${mode}"; then
return 0
fi
if ! truthy "${mode}"; then
echo "LAB_OPENWRT_VM must be true or false." >&2
exit 1
fi
if [[ -z "${image_url}" ]]; then
image_url="https://downloads.openwrt.org/releases/${version}/targets/armsr/armv8/openwrt-${version}-armsr-armv8-generic-ext4-combined-efi.img.gz"
fi
if ! [[ "${vmid}" =~ ^[0-9]+$ ]]; then
echo "LAB_OPENWRT_VMID must be a numeric Pimox VMID." >&2
exit 1
fi
for value_name in storage wan_bridge lan_bridge vm_name; do
local value="${!value_name}"
if ! [[ "${value}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then
echo "LAB_OPENWRT_${value_name^^} contains unsupported characters." >&2
exit 1
fi
done
if [[ "${storage}" == "local" ]]; then
echo "LAB_OPENWRT_STORAGE cannot be local; reserve local storage for the Pimox Debian template." >&2
exit 1
fi
if ! [[ "${lan_ip}" =~ ^[0-9.]+$ && "${lan_netmask}" =~ ^[0-9.]+$ ]]; then
echo "LAB_OPENWRT_LAN_IP and LAB_OPENWRT_LAN_NETMASK must be IPv4-style values." >&2
exit 1
fi
if truthy "${lan_dhcp_enabled}"; then
lan_dhcp_ignore="0"
fi
if truthy "${start_vm}"; then
start_vm_flag="true"
fi
if [[ -r "${root_key_path}" ]]; then
root_key_b64="$(base64 <"${root_key_path}" | tr -d '\n')"
fi
echo "Preparing OpenWrt firewall VM ${vmid} on ${pimox_host}; validating ${wan_bridge}, ${lan_bridge}, and ${storage} without changing Orange Pi networking..."
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "bash -s" <<EOF
set -euo pipefail
vmid="${vmid}"
vm_name="${vm_name}"
storage="${storage}"
wan_bridge="${wan_bridge}"
lan_bridge="${lan_bridge}"
cores="${cores}"
memory="${memory}"
image_url="${image_url}"
lan_ip="${lan_ip}"
lan_netmask="${lan_netmask}"
lan_dhcp_ignore="${lan_dhcp_ignore}"
start_vm="${start_vm_flag}"
root_key_b64="${root_key_b64}"
qm_cmd="${qm_bin}"
if [ ! -x "\$qm_cmd" ]; then
qm_cmd="\$(command -v qm 2>/dev/null || true)"
fi
if [ -z "\$qm_cmd" ]; then
echo "qm is not installed on this Pimox host" >&2
exit 1
fi
pvesm_cmd="\$(command -v pvesm 2>/dev/null || true)"
if [ -z "\$pvesm_cmd" ] && [ -x /usr/sbin/pvesm ]; then
pvesm_cmd=/usr/sbin/pvesm
fi
if [ -z "\$pvesm_cmd" ]; then
echo "pvesm was not found; cannot validate Pimox storage \$storage" >&2
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo "passwordless sudo is required for OpenWrt VM automation" >&2
exit 1
fi
if ! ip link show "\$wan_bridge" >/dev/null 2>&1; then
echo "WAN bridge \$wan_bridge does not exist. Refusing to change Orange Pi networking." >&2
exit 1
fi
if ! ip link show "\$lan_bridge" >/dev/null 2>&1; then
echo "LAN bridge \$lan_bridge does not exist. Create it manually before enabling OpenWrt automation." >&2
exit 1
fi
if ! sudo "\$pvesm_cmd" status | awk -v storage="\$storage" 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then
echo "Pimox storage \$storage was not found." >&2
exit 1
fi
if sudo "\$qm_cmd" status "\$vmid" >/dev/null 2>&1; then
if sudo "\$qm_cmd" config "\$vmid" | grep -q '^template: 1$'; then
echo "VM \$vmid exists as a template; refusing to reuse it for OpenWrt." >&2
exit 1
fi
sudo "\$qm_cmd" set "\$vmid" \\
--net0 "virtio,bridge=\$wan_bridge" \\
--net1 "virtio,bridge=\$lan_bridge" \\
--cores "\$cores" \\
--memory "\$memory" \\
--onboot 1
if [ "\$start_vm" = "true" ] && sudo "\$qm_cmd" status "\$vmid" | grep -q 'status: stopped'; then
sudo "\$qm_cmd" start "\$vmid"
fi
exit 0
fi
for required_cmd in curl gzip losetup mount umount awk sed; do
if ! command -v "\$required_cmd" >/dev/null 2>&1; then
echo "\$required_cmd is required on the Pimox host for OpenWrt image preparation" >&2
exit 1
fi
done
tmp_dir="\$(mktemp -d /tmp/homelab-openwrt.XXXXXX)"
mnt_dir="\$tmp_dir/root"
loopdev=""
cleanup() {
if mountpoint -q "\$mnt_dir" 2>/dev/null; then
sudo umount "\$mnt_dir" || sudo umount -l "\$mnt_dir" || true
fi
if [ -n "\$loopdev" ]; then
sudo losetup -d "\$loopdev" >/dev/null 2>&1 || true
fi
rm -rf "\$tmp_dir"
}
trap cleanup EXIT
mkdir -p "\$mnt_dir"
curl -fsSL "\$image_url" -o "\$tmp_dir/openwrt.img.gz"
gzip -dc "\$tmp_dir/openwrt.img.gz" >"\$tmp_dir/openwrt.img"
loopdev="\$(sudo losetup --find --partscan --show "\$tmp_dir/openwrt.img")"
root_part="\${loopdev}p2"
if [ ! -b "\$root_part" ] && echo "\$loopdev" | grep -q 'loop[0-9]\$'; then
root_part="\${loopdev}p2"
fi
if [ ! -b "\$root_part" ]; then
echo "Could not find OpenWrt root partition \$root_part after attaching image." >&2
exit 1
fi
sudo mount "\$root_part" "\$mnt_dir"
sudo mkdir -p "\$mnt_dir/etc/config" "\$mnt_dir/etc/dropbear" "\$mnt_dir/root/.ssh"
cat >"\$tmp_dir/network" <<NETWORK
config interface 'loopback'
option device 'lo'
option proto 'static'
option ipaddr '127.0.0.1'
option netmask '255.0.0.0'
config globals 'globals'
option ula_prefix 'fd00:68:50::/48'
config interface 'wan'
option device 'eth0'
option proto 'dhcp'
config interface 'lan'
option device 'eth1'
option proto 'static'
option ipaddr '\$lan_ip'
option netmask '\$lan_netmask'
option ip6assign '60'
NETWORK
cat >"\$tmp_dir/dhcp" <<DHCP
config dnsmasq
option domainneeded '1'
option boguspriv '1'
option filterwin2k '0'
option localise_queries '1'
option rebind_protection '1'
option rebind_localhost '1'
option local '/lan/'
option domain 'lan'
option expandhosts '1'
option cachesize '1000'
option authoritative '1'
option readethers '1'
option leasefile '/tmp/dhcp.leases'
option resolvfile '/tmp/resolv.conf.d/resolv.conf.auto'
config dhcp 'lan'
option interface 'lan'
option start '100'
option limit '150'
option leasetime '12h'
option ignore '\$lan_dhcp_ignore'
config dhcp 'wan'
option interface 'wan'
option ignore '1'
DHCP
cat >"\$tmp_dir/firewall" <<'FIREWALL'
config defaults
option input 'REJECT'
option output 'ACCEPT'
option forward 'REJECT'
option synflood_protect '1'
config zone
option name 'lan'
list network 'lan'
option input 'ACCEPT'
option output 'ACCEPT'
option forward 'ACCEPT'
config zone
option name 'wan'
list network 'wan'
option input 'REJECT'
option output 'ACCEPT'
option forward 'REJECT'
option masq '1'
option mtu_fix '1'
config forwarding
option src 'lan'
option dest 'wan'
config rule
option name 'Allow-DHCP-Renew'
option src 'wan'
option proto 'udp'
option dest_port '68'
option target 'ACCEPT'
option family 'ipv4'
config rule
option name 'Allow-Ping'
option src 'wan'
option proto 'icmp'
option icmp_type 'echo-request'
option family 'ipv4'
option target 'ACCEPT'
FIREWALL
cat >"\$tmp_dir/system" <<SYSTEM
config system
option hostname '\$vm_name'
option timezone 'UTC'
option ttylogin '0'
option log_size '64'
option urandom_seed '0'
SYSTEM
sudo cp "\$tmp_dir/network" "\$mnt_dir/etc/config/network"
sudo cp "\$tmp_dir/dhcp" "\$mnt_dir/etc/config/dhcp"
sudo cp "\$tmp_dir/firewall" "\$mnt_dir/etc/config/firewall"
sudo cp "\$tmp_dir/system" "\$mnt_dir/etc/config/system"
if [ -n "\$root_key_b64" ]; then
printf '%s' "\$root_key_b64" | base64 -d >"\$tmp_dir/authorized_keys"
sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/etc/dropbear/authorized_keys"
sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys"
sudo chmod 0600 "\$mnt_dir/etc/dropbear/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys"
fi
sync
sudo umount "\$mnt_dir"
sudo losetup -d "\$loopdev"
loopdev=""
sudo "\$qm_cmd" create "\$vmid" \\
--name "\$vm_name" \\
--bios ovmf \\
--cores "\$cores" \\
--memory "\$memory" \\
--net0 "virtio,bridge=\$wan_bridge" \\
--net1 "virtio,bridge=\$lan_bridge" \\
--numa 0 \\
--ostype l26 \\
--scsihw virtio-scsi-pci \\
--sockets 1 \\
--vga virtio \\
--onboot 1
sudo "\$qm_cmd" set "\$vmid" --efidisk0 "\$storage:1,efitype=4m,pre-enrolled-keys=0"
sudo "\$qm_cmd" importdisk "\$vmid" "\$tmp_dir/openwrt.img" "\$storage" --format raw >/dev/null
disk_volume="\$(sudo "\$qm_cmd" config "\$vmid" | awk -F': ' '/^unused[0-9]+:/ { print \$2; exit }')"
if [ -z "\$disk_volume" ]; then
echo "Could not find imported OpenWrt disk volume for VM \$vmid" >&2
exit 1
fi
sudo "\$qm_cmd" set "\$vmid" --scsi0 "\$disk_volume"
sudo "\$qm_cmd" set "\$vmid" --boot "order=scsi0"
if [ "\$start_vm" = "true" ]; then
sudo "\$qm_cmd" start "\$vmid"
fi
EOF
}
cleanup_calico_links() {
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
sudo ip link delete vxlan.calico 2>/dev/null || true
sudo ip link delete tunl0 2>/dev/null || true
sudo ip link delete cni0 2>/dev/null || true
sudo ip link delete kube-ipvs0 2>/dev/null || true
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
}
cleanup_iptables() {
sudo iptables -F || true
sudo iptables -X || true
sudo iptables -t nat -F || true
sudo iptables -t nat -X || true
sudo iptables -t mangle -F || true
sudo iptables -t mangle -X || true
sudo iptables -t raw -F || true
sudo iptables -t raw -X || true
if command -v ipvsadm >/dev/null 2>&1; then
sudo ipvsadm --clear || true
fi
}
cleanup_calico_runtime_files() {
local path
for path in /run/calico /var/run/calico; do
if sudo test -e "${path}"; then
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
sudo rmdir "${path}" 2>/dev/null || true
fi
done
}
restore_node_dns() {
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
sudo rm -f /etc/resolv.conf
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
fi
sudo systemctl restart systemd-resolved 2>/dev/null || true
}
cleanup_mounts() {
if command -v findmnt >/dev/null 2>&1; then
local mount_root
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
done | sort -ru
)
fi
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
}
cleanup_node() {
sudo kubeadm reset --force || true
sudo systemctl stop kubelet 2>/dev/null || true
sudo systemctl stop containerd 2>/dev/null || true
sudo killall containerd-shim-runc-v2 2>/dev/null || true
cleanup_mounts
sudo rm -rf \
/etc/kubernetes/ \
/var/lib/etcd/ \
/var/lib/kubelet/ \
/var/lib/cni/ \
/etc/cni/net.d \
/run/flannel \
/var/lib/calico \
/var/log/calico \
/var/lib/containerd/* \
/run/containerd/* \
/etc/containerd/certs.d \
/etc/containerd/config.toml
cleanup_calico_runtime_files
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
cleanup_iptables
cleanup_calico_links
restore_node_dns
sudo mkdir -p /etc/containerd/certs.d
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
sudo systemctl start containerd 2>/dev/null || true
}
website_registry_endpoint() {
local image
image="$(awk '$1 == "image:" && $2 ~ /php-website/ {print $2; exit}' "${REPO_ROOT}/apps/website/web-app.yaml")"
if [[ -z "${image}" || "${image}" != */* ]]; then
echo "Could not determine website registry endpoint from apps/website/web-app.yaml" >&2
exit 1
fi
printf '%s\n' "${image%%/*}"
}
demos_registry_endpoint() {
local image
image="$(awk '$1 == "image:" && $2 ~ /demos-static/ {print $2; exit}' "${REPO_ROOT}/apps/demos-static/web-app.yaml")"
if [[ -z "${image}" || "${image}" != */* ]]; then
echo "Could not determine demos registry endpoint from apps/demos-static/web-app.yaml" >&2
exit 1
fi
printf '%s\n' "${image%%/*}"
}
website_source_hash() {
(
cd "${REPO_ROOT}"
find apps/website -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
)
}
demos_source_hash() {
(
cd "${REPO_ROOT}"
find apps/demos-static -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
)
}
registry_image_exists() {
local registry_endpoint="$1"
local repository="$2"
local tag="$3"
local accept_header
if ! command -v curl >/dev/null 2>&1; then
return 1
fi
accept_header="application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json"
curl -fsS \
-H "Accept: ${accept_header}" \
"http://${registry_endpoint}/v2/${repository}/manifests/${tag}" >/dev/null
}
image_state_value() {
local state_file="$1"
local key="$2"
awk -F= -v key="${key}" '$1 == key {print substr($0, index($0, "=") + 1); exit}' "${state_file}" 2>/dev/null || true
}
website_image_is_current() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
local registry_endpoint="$5"
local saved_hash
local saved_platforms
local saved_image
[[ -f "${state_file}" ]] || return 1
saved_hash="$(image_state_value "${state_file}" source_hash)"
saved_platforms="$(image_state_value "${state_file}" platforms)"
saved_image="$(image_state_value "${state_file}" image)"
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
[[ "${saved_image}" == "${image_ref}" ]] || return 1
registry_image_exists "${registry_endpoint}" php-website latest
}
demos_image_is_current() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
local registry_endpoint="$5"
local saved_hash
local saved_platforms
local saved_image
[[ -f "${state_file}" ]] || return 1
saved_hash="$(image_state_value "${state_file}" source_hash)"
saved_platforms="$(image_state_value "${state_file}" platforms)"
saved_image="$(image_state_value "${state_file}" image)"
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
[[ "${saved_image}" == "${image_ref}" ]] || return 1
registry_image_exists "${registry_endpoint}" demos-static latest
}
write_website_image_state() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
mkdir -p "$(dirname "${state_file}")"
{
printf 'source_hash=%s\n' "${source_hash}"
printf 'platforms=%s\n' "${platforms}"
printf 'image=%s\n' "${image_ref}"
} > "${state_file}"
}
write_demos_image_state() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
mkdir -p "$(dirname "${state_file}")"
{
printf 'source_hash=%s\n' "${source_hash}"
printf 'platforms=%s\n' "${platforms}"
printf 'image=%s\n' "${image_ref}"
} > "${state_file}"
}
path_available_mb() {
local path="$1"
while [[ ! -e "${path}" && "${path}" != "/" ]]; do
path="$(dirname "${path}")"
done
df -Pm "${path}" | awk 'NR == 2 {print $4}'
}
docker_root_dir() {
docker info --format '{{.DockerRootDir}}' 2>/dev/null || printf '/var/lib/docker\n'
}
prune_unused_docker_build_data() {
docker buildx rm lab-builder 2>/dev/null || true
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
docker builder prune -af 2>/dev/null || true
docker system prune -af 2>/dev/null || true
}
ensure_docker_build_space() {
local docker_root
local free_mb
local min_free_mb
min_free_mb="${DOCKER_BUILD_MIN_FREE_MB:-4096}"
docker_root="$(docker_root_dir)"
free_mb="$(path_available_mb "${docker_root}")"
if (( free_mb >= min_free_mb )); then
return 0
fi
echo "Docker data root ${docker_root} has ${free_mb}MiB free; pruning unused Docker build data..."
prune_unused_docker_build_data
free_mb="$(path_available_mb "${docker_root}")"
if (( free_mb < min_free_mb )); then
echo "Docker data root ${docker_root} still has only ${free_mb}MiB free after cleanup." >&2
echo "Free space there or move Docker's data-root to a larger filesystem such as /home before building." >&2
echo "Override the threshold with DOCKER_BUILD_MIN_FREE_MB if this host can build with less space." >&2
exit 1
fi
}
prepare_buildx_builder() {
local registry_endpoint="$1"
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
cat <<EOF > "${BUILDX_CONFIG}"
[registry."${registry_endpoint}"]
http = true
insecure = true
[registry."127.0.0.1:30500"]
http = true
insecure = true
[registry."localhost:30500"]
http = true
insecure = true
EOF
docker buildx rm lab-builder 2>/dev/null || true
docker buildx create --name lab-builder --driver docker-container --driver-opt network=host --config "${BUILDX_CONFIG}" --use
docker buildx inspect --bootstrap
}
dump_argocd_debug() {
local app="$1"
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true
}
dump_namespace_debug() {
local namespace="$1"
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
}
wait_for_namespace() {
local namespace="$1"
local app="$2"
local timeout_seconds="$3"
local elapsed=0
until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2
dump_argocd_debug "${app}"
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
wait_for_namespaced_resource() {
local namespace="$1"
local kind="$2"
local name="$3"
local app="$4"
local timeout_seconds="$5"
local elapsed=0
until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2
dump_argocd_debug "${app}"
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
wait_for_deployment_ready() {
local namespace="$1"
local deployment="$2"
local app="$3"
local timeout_seconds="$4"
local desired_replicas
local ready_replicas
local elapsed=0
desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
desired_replicas="${desired_replicas:-1}"
until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \
(( ${ready_replicas:-0} >= desired_replicas )); do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2
dump_argocd_debug "${app}"
dump_namespace_debug "${namespace}"
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
apply_gitea_bootstrap_manifests() {
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/namespace.yaml"
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/storage.yaml"
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/service.yaml"
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/deployment.yaml"
wait_for_namespace gitea-system gitea 300
wait_for_namespaced_resource gitea-system deployment gitea gitea 300
wait_for_deployment_ready gitea-system gitea gitea 300
}
install_gitea_backup_timer() {
local backup_script="/usr/local/sbin/homelab-gitea-backup.sh"
sudo tee "${backup_script}" >/dev/null <<BACKUP_SCRIPT_EOT
#!/usr/bin/env bash
set -euo pipefail
KUBECONFIG_PATH="\${KUBECONFIG_PATH:-${KUBECONFIG_PATH}}"
GITEA_NAMESPACE="\${GITEA_NAMESPACE:-gitea-system}"
GITEA_SELECTOR="\${GITEA_SELECTOR:-app=gitea}"
GITEA_CONTAINER="\${GITEA_CONTAINER:-gitea}"
GITEA_BACKUP_DIR="\${GITEA_BACKUP_DIR:-/var/backups/homelab/gitea}"
GITEA_BACKUP_RETENTION_DAYS="\${GITEA_BACKUP_RETENTION_DAYS:-30}"
REMOTE_ARCHIVE="/tmp/homelab-gitea-dump.zip"
if [[ ! -s "\${KUBECONFIG_PATH}" ]]; then
echo "Skipping Gitea backup: kubeconfig \${KUBECONFIG_PATH} does not exist."
exit 0
fi
if ! command -v kubectl >/dev/null 2>&1; then
echo "kubectl is required for Gitea backups." >&2
exit 1
fi
pod="\$(kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" get pods \
-l "\${GITEA_SELECTOR}" \
--field-selector=status.phase=Running \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
if [[ -z "\${pod}" ]]; then
echo "Skipping Gitea backup: no running Gitea pod found."
exit 0
fi
timestamp="\$(date -u +%Y%m%dT%H%M%SZ)"
tmp_archive="\$(mktemp "/tmp/gitea-\${timestamp}.XXXXXX.zip")"
backup_archive="\${GITEA_BACKUP_DIR}/gitea-\${timestamp}.zip"
cleanup() {
rm -f "\${tmp_archive}"
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true
}
trap cleanup EXIT
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \
sh -c 'mkdir -p /data/git/repositories && chown git:git /data/git /data/git/repositories'
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \
su-exec git gitea dump -c /data/gitea/conf/app.ini --file "\${REMOTE_ARCHIVE}"
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" cp -c "\${GITEA_CONTAINER}" \
"\${GITEA_NAMESPACE}/\${pod}:\${REMOTE_ARCHIVE}" "\${tmp_archive}"
sudo mkdir -p "\${GITEA_BACKUP_DIR}"
sudo install -m 0640 -o root -g root "\${tmp_archive}" "\${backup_archive}"
sudo find "\${GITEA_BACKUP_DIR}" -type f -name 'gitea-*.zip' -mtime +"\${GITEA_BACKUP_RETENTION_DAYS}" -delete
echo "Created \${backup_archive}"
BACKUP_SCRIPT_EOT
sudo chmod 0755 "${backup_script}"
sudo tee /etc/systemd/system/homelab-gitea-backup.service >/dev/null <<'SERVICE_EOT'
[Unit]
Description=Back up in-cluster Gitea to Debian host storage
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/local/sbin/homelab-gitea-backup.sh
SERVICE_EOT
sudo tee /etc/systemd/system/homelab-gitea-backup.timer >/dev/null <<'TIMER_EOT'
[Unit]
Description=Run daily Homelab Gitea backups
[Timer]
OnCalendar=*-*-* 02:35:00
RandomizedDelaySec=20m
Persistent=true
[Install]
WantedBy=timers.target
TIMER_EOT
sudo systemctl daemon-reload
sudo systemctl enable --now homelab-gitea-backup.timer >/dev/null
}
backup_gitea() {
require_debian_server "backup-gitea"
export KUBECONFIG="${KUBECONFIG_PATH}"
install_gitea_backup_timer
sudo /usr/local/sbin/homelab-gitea-backup.sh
}
install_gitea_runner() {
local runner_arch
local runner_home="${GITEA_RUNNER_HOME:-/home/jv/.local/share/gitea-runner/my-homelab-configs}"
local runner_instance="${GITEA_RUNNER_INSTANCE_URL:-https://lab2025.duckdns.org/git/}"
local runner_labels="${GITEA_RUNNER_LABELS:-homelab-debian:host}"
local runner_name="${GITEA_RUNNER_NAME:-homelab-debian-my-homelab-configs}"
local runner_token="${GITEA_RUNNER_REGISTRATION_TOKEN:-${1:-}}"
local runner_user="${GITEA_RUNNER_USER:-jv}"
local runner_version="${GITEA_ACT_RUNNER_VERSION:-0.2.11}"
local missing_packages=()
require_debian_server "install-gitea-runner"
case "$(dpkg --print-architecture)" in
amd64)
runner_arch="linux-amd64"
;;
arm64)
runner_arch="linux-arm64"
;;
*)
echo "Unsupported Debian architecture: $(dpkg --print-architecture)" >&2
exit 1
;;
esac
for package in ca-certificates curl git nodejs python3; do
if ! dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "install ok installed"; then
missing_packages+=("$package")
fi
done
if [[ ${#missing_packages[@]} -gt 0 ]]; then
sudo apt-get update
sudo apt-get install -y --no-install-recommends "${missing_packages[@]}"
fi
sudo curl -fsSL \
-o /usr/local/bin/act_runner \
"https://gitea.com/gitea/act_runner/releases/download/v${runner_version}/act_runner-${runner_version}-${runner_arch}"
sudo chmod 0755 /usr/local/bin/act_runner
sudo chown root:root /usr/local/bin/act_runner
sudo -u "${runner_user}" mkdir -p "${runner_home}"
if [[ ! -f "${runner_home}/.runner" ]]; then
if [[ -z "${runner_token}" ]]; then
echo "Set GITEA_RUNNER_REGISTRATION_TOKEN to the repository-level runner token from Gitea." >&2
exit 1
fi
sudo -u "${runner_user}" env \
HOME="/home/${runner_user}" \
GITEA_RUNNER_HOME="${runner_home}" \
GITEA_RUNNER_INSTANCE_URL="${runner_instance}" \
GITEA_RUNNER_REGISTRATION_TOKEN="${runner_token}" \
GITEA_RUNNER_NAME="${runner_name}" \
GITEA_RUNNER_LABELS="${runner_labels}" \
bash -lc 'cd "${GITEA_RUNNER_HOME}" && /usr/local/bin/act_runner register --no-interactive --instance "${GITEA_RUNNER_INSTANCE_URL}" --token "${GITEA_RUNNER_REGISTRATION_TOKEN}" --name "${GITEA_RUNNER_NAME}" --labels "${GITEA_RUNNER_LABELS}"'
else
echo "Existing runner registration found at ${runner_home}/.runner; keeping it."
fi
sudo tee /etc/systemd/system/homelab-gitea-runner.service >/dev/null <<SERVICE_EOT
[Unit]
Description=Homelab Gitea Actions runner for my-homelab-configs
After=network-online.target docker.service
Wants=network-online.target
[Service]
Type=simple
User=${runner_user}
Group=${runner_user}
WorkingDirectory=${runner_home}
Environment=HOME=/home/${runner_user}
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/usr/local/bin/act_runner daemon
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
SERVICE_EOT
sudo systemctl daemon-reload
sudo systemctl enable --now homelab-gitea-runner.service >/dev/null
sudo systemctl status homelab-gitea-runner.service --no-pager -l
}
recreate_pods_for_selector() {
local namespace="$1"
local selector="$2"
local app="$3"
if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then
echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2
dump_argocd_debug "${app}"
dump_namespace_debug "${namespace}"
exit 1
fi
}
refresh_argocd_application() {
local app="$1"
kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null
}
apps() {
local buildx_builder_ready=false
local demos_image_built=false
local demos_image_ref
local demos_image_state_file
local demos_platforms
local demos_registry_endpoint
local demos_source_hash
local registry_endpoint
local website_image_built=false
local website_image_ref
local website_image_state_file
local website_platforms
local website_source_hash
require_debian_server "apps"
registry_endpoint="$(website_registry_endpoint)"
demos_registry_endpoint="$(demos_registry_endpoint)"
demos_image_ref="${registry_endpoint}/demos-static:latest"
demos_image_state_file="${REPO_ROOT}/.lab/demos-static-image.state"
demos_platforms="${DEMOS_IMAGE_PLATFORMS:-linux/arm64}"
demos_source_hash="$(demos_source_hash)"
website_image_ref="${registry_endpoint}/php-website:latest"
website_image_state_file="${REPO_ROOT}/.lab/php-website-image.state"
website_platforms="${WEBSITE_IMAGE_PLATFORMS:-linux/arm64}"
website_source_hash="$(website_source_hash)"
export TF_VAR_registry_endpoint="${TF_VAR_registry_endpoint:-${registry_endpoint}}"
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
if [[ "${TF_VAR_registry_endpoint}" != "${registry_endpoint}" ]]; then
echo "TF_VAR_registry_endpoint must match apps/website/web-app.yaml (${registry_endpoint})" >&2
exit 1
fi
if [[ "${demos_registry_endpoint}" != "${registry_endpoint}" ]]; then
echo "apps/demos-static/web-app.yaml registry endpoint (${demos_registry_endpoint}) must match apps/website/web-app.yaml (${registry_endpoint})" >&2
exit 1
fi
echo "Deploying homelab applications..."
apply_gitea_bootstrap_manifests
run_tofu_stack "bootstrap/apps"
refresh_argocd_application container-registry
refresh_argocd_application demos-static
refresh_argocd_application gitea
refresh_argocd_application website-production
wait_for_namespace container-registry container-registry 300
wait_for_namespaced_resource container-registry deployment local-registry container-registry 300
wait_for_deployment_ready container-registry local-registry container-registry 300
if website_image_is_current "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" "${registry_endpoint}"; then
echo "Website image ${website_image_ref} is already current (${website_source_hash}); skipping build."
else
echo "Building website image ${website_image_ref} for ${website_platforms} (${website_source_hash})..."
ensure_docker_build_space
if [[ "${buildx_builder_ready}" != "true" ]]; then
prepare_buildx_builder "${registry_endpoint}"
buildx_builder_ready=true
fi
docker buildx build \
--network host \
--platform "${website_platforms}" \
--provenance=false \
--sbom=false \
--label "dev.homelab.website.source-hash=${website_source_hash}" \
-t "${website_image_ref}" \
-f "${REPO_ROOT}/apps/website/Dockerfile" \
"${REPO_ROOT}/apps/website/" \
--push
website_image_built=true
fi
if demos_image_is_current "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" "${registry_endpoint}"; then
echo "Demos image ${demos_image_ref} is already current (${demos_source_hash}); skipping build."
else
echo "Building demos image ${demos_image_ref} for ${demos_platforms} (${demos_source_hash})..."
ensure_docker_build_space
if [[ "${buildx_builder_ready}" != "true" ]]; then
prepare_buildx_builder "${registry_endpoint}"
buildx_builder_ready=true
fi
docker buildx build \
--network host \
--platform "${demos_platforms}" \
--provenance=false \
--sbom=false \
--label "dev.homelab.demos.source-hash=${demos_source_hash}" \
-t "${demos_image_ref}" \
-f "${REPO_ROOT}/apps/demos-static/Dockerfile" \
"${REPO_ROOT}/apps/demos-static/" \
--push
demos_image_built=true
fi
refresh_argocd_application website-production
wait_for_namespace website-production website-production 300
wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300
if [[ "${website_image_built}" == "true" ]]; then
recreate_pods_for_selector website-production app=php-website website-production
else
echo "Skipping website pod restart because the image did not change."
fi
wait_for_deployment_ready website-production php-website-deployment website-production 300
if [[ "${website_image_built}" == "true" ]]; then
write_website_image_state "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}"
fi
refresh_argocd_application demos-static
wait_for_namespace demos-static demos-static 300
wait_for_namespaced_resource demos-static deployment demos-static demos-static 300
if [[ "${demos_image_built}" == "true" ]]; then
recreate_pods_for_selector demos-static app=demos-static demos-static
else
echo "Skipping demos pod restart because the image did not change."
fi
wait_for_deployment_ready demos-static demos-static demos-static 300
if [[ "${demos_image_built}" == "true" ]]; then
write_demos_image_state "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}"
fi
echo "Application deployment successfully completed."
}
up() {
require_debian_server "up"
echo "Deploying the homelab infrastructure..."
run_pimox_pipeline
run_openwrt_pipeline
run_tofu_stack "bootstrap/cluster"
run_tofu_stack "bootstrap/platform"
install_gitea_backup_timer
apps
run_tofu_stack "bootstrap/edge"
echo "Deployment successfully completed."
}
nuke() {
local worker_ssh_targets
local worker_targets
local target
require_debian_server "nuke"
echo "Brutally nuking the homelab infrastructure..."
worker_ssh_targets="${WORKER_SSH_TARGETS-jv@192.168.100.89}"
read -r -a worker_targets <<< "${worker_ssh_targets}"
echo "--> Terminating local OpenTofu tasks..."
killall tofu terraform 2>/dev/null || true
echo "--> Eviscerating local Kubernetes components..."
cleanup_node
sudo rm -f "${KUBECONFIG_PATH}"
for target in "${worker_targets[@]}"; do
echo "--> Eviscerating remote Kubernetes components (${target})..."
if ! ssh -o ConnectTimeout=5 "${target}" "bash -s" <<'EOF'
set -euo pipefail
cleanup_calico_links() {
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
sudo ip link delete vxlan.calico 2>/dev/null || true
sudo ip link delete tunl0 2>/dev/null || true
sudo ip link delete cni0 2>/dev/null || true
sudo ip link delete kube-ipvs0 2>/dev/null || true
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
}
cleanup_iptables() {
sudo iptables -F || true
sudo iptables -X || true
sudo iptables -t nat -F || true
sudo iptables -t nat -X || true
sudo iptables -t mangle -F || true
sudo iptables -t mangle -X || true
sudo iptables -t raw -F || true
sudo iptables -t raw -X || true
if command -v ipvsadm >/dev/null 2>&1; then
sudo ipvsadm --clear || true
fi
}
cleanup_calico_runtime_files() {
local path
for path in /run/calico /var/run/calico; do
if sudo test -e "${path}"; then
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
sudo rmdir "${path}" 2>/dev/null || true
fi
done
}
restore_node_dns() {
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
sudo rm -f /etc/resolv.conf
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
fi
sudo systemctl restart systemd-resolved 2>/dev/null || true
}
cleanup_mounts() {
if command -v findmnt >/dev/null 2>&1; then
local mount_root
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
done | sort -ru
)
fi
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
}
sudo kubeadm reset --force || true
sudo systemctl stop kubelet 2>/dev/null || true
sudo systemctl stop containerd 2>/dev/null || true
sudo killall containerd-shim-runc-v2 2>/dev/null || true
cleanup_mounts
sudo rm -rf \
/etc/kubernetes/ \
/var/lib/etcd/ \
/var/lib/kubelet/ \
/var/lib/cni/ \
/etc/cni/net.d \
/run/flannel \
/var/lib/calico \
/var/log/calico \
/var/lib/containerd/* \
/run/containerd/* \
/etc/containerd/certs.d \
/etc/containerd/config.toml
cleanup_calico_runtime_files
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
cleanup_iptables
cleanup_calico_links
restore_node_dns
sudo mkdir -p /etc/containerd/certs.d
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
sudo systemctl start containerd 2>/dev/null || true
EOF
then
echo "Remote cleanup failed for ${target}; not deleting OpenTofu state." >&2
exit 1
fi
done
docker buildx rm lab-builder 2>/dev/null || true
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
rm -f "${BUILDX_CONFIG}" || true
echo "--> Deleting OpenTofu tracking state files..."
rm -rf "${REPO_ROOT}"/bootstrap/cluster/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/cluster/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/cluster/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/platform/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/platform/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/platform/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/apps/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/apps/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/apps/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/edge/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/edge/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/edge/.terraform/
echo "Destruction complete. Retained data under /var/openebs/local was left intact."
}
case "${1:-}" in
up)
up
;;
apps)
apps
;;
backup-gitea)
backup_gitea
;;
install-gitea-runner)
install_gitea_runner "${2:-}"
;;
nuke)
nuke
;;
*)
echo "Usage: $0 {up|apps|backup-gitea|install-gitea-runner|nuke}"
exit 1
;;
esac