#!/usr/bin/env bash set -euo pipefail REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" BUILDX_CONFIG="/tmp/buildx-config.toml" KUBECONFIG_PATH="${KUBECONFIG_PATH:-${TF_VAR_kubeconfig_path:-/home/jv/.kube/config}}" trap 'rm -f "${BUILDX_CONFIG}"' EXIT require_debian_server() { local command_name="$1" local os_id="" if [[ "$(uname -s)" != "Linux" ]]; then echo "Refusing to run '${command_name}' from this machine. Run it on the Debian homelab server." >&2 exit 1 fi if [[ -r /etc/os-release ]]; then os_id="$(awk -F= '$1 == "ID" {gsub(/"/, "", $2); print $2; exit}' /etc/os-release)" fi if [[ "${os_id}" != "debian" ]]; then echo "Refusing to run '${command_name}' on ${os_id:-unknown OS}. Run it on the Debian homelab server." >&2 exit 1 fi } tofu_state_has_resource() { local stack="$1" local resource_address="$2" tofu -chdir="${REPO_ROOT}/${stack}" state show "${resource_address}" >/dev/null 2>&1 } helm_release_secret_exists() { local namespace="$1" local release_name="$2" local secret_name secret_name="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get secrets \ -l "owner=helm,name=${release_name}" \ -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" [[ -n "${secret_name}" ]] } kubernetes_resource_exists() { local namespace="$1" local resource_kind="$2" local resource_name="$3" if [[ -n "${namespace}" ]]; then kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get "${resource_kind}" "${resource_name}" >/dev/null 2>&1 return $? fi kubectl --kubeconfig "${KUBECONFIG_PATH}" get "${resource_kind}" "${resource_name}" >/dev/null 2>&1 } adopt_tofu_helm_release() { local stack="$1" local resource_address="$2" local namespace="$3" local release_name="$4" if tofu_state_has_resource "${stack}" "${resource_address}"; then return 0 fi if ! helm_release_secret_exists "${namespace}" "${release_name}"; then return 0 fi echo "Importing existing Helm release ${namespace}/${release_name} into ${stack} state (${resource_address})..." tofu -chdir="${REPO_ROOT}/${stack}" import -input=false "${resource_address}" "${namespace}/${release_name}" } adopt_tofu_kubernetes_resource() { local stack="$1" local resource_address="$2" local namespace="$3" local resource_kind="$4" local resource_name="$5" local import_id="$6" if tofu_state_has_resource "${stack}" "${resource_address}"; then return 0 fi if ! kubernetes_resource_exists "${namespace}" "${resource_kind}" "${resource_name}"; then return 0 fi echo "Importing existing Kubernetes ${resource_kind} ${resource_name} into ${stack} state (${resource_address})..." tofu -chdir="${REPO_ROOT}/${stack}" import -input=false "${resource_address}" "${import_id}" } adopt_tofu_kubernetes_manifest() { local stack="$1" local resource_address="$2" local namespace="$3" local kubectl_kind="$4" local api_version="$5" local manifest_kind="$6" local resource_name="$7" local import_id if tofu_state_has_resource "${stack}" "${resource_address}"; then return 0 fi if ! kubernetes_resource_exists "${namespace}" "${kubectl_kind}" "${resource_name}"; then return 0 fi import_id="apiVersion=${api_version},kind=${manifest_kind},namespace=${namespace},name=${resource_name}" echo "Importing existing Kubernetes ${manifest_kind} ${namespace}/${resource_name} into ${stack} state (${resource_address})..." tofu -chdir="${REPO_ROOT}/${stack}" import -input=false "${resource_address}" "${import_id}" } adopt_platform_existing_resources() { local stack="bootstrap/platform" adopt_tofu_helm_release "${stack}" "helm_release.calico_crds" "tigera-operator" "calico-crds" adopt_tofu_helm_release "${stack}" "helm_release.calico" "tigera-operator" "calico" adopt_tofu_helm_release "${stack}" "helm_release.openebs" "openebs" "openebs" adopt_tofu_helm_release "${stack}" "helm_release.argocd" "argocd" "argocd" adopt_tofu_helm_release "${stack}" "helm_release.kyverno" "kyverno" "kyverno" adopt_tofu_helm_release "${stack}" "helm_release.kyverno_policies" "kyverno" "kyverno-policies" adopt_tofu_helm_release "${stack}" "helm_release.loki" "monitoring" "loki" adopt_tofu_helm_release "${stack}" "helm_release.promtail" "monitoring" "promtail" adopt_tofu_helm_release "${stack}" "helm_release.prometheus_stack" "monitoring" "prometheus-stack" adopt_tofu_kubernetes_resource \ "${stack}" \ "kubernetes_storage_class_v1.openebs_hostpath_retain" \ "" \ "storageclass" \ "openebs-hostpath-retain" \ "openebs-hostpath-retain" adopt_tofu_kubernetes_resource \ "${stack}" \ "kubernetes_namespace_v1.monitoring" \ "" \ "namespace" \ "monitoring" \ "monitoring" } adopt_apps_existing_resources() { local stack="bootstrap/apps" local namespace="${TF_VAR_argocd_namespace:-argocd}" adopt_tofu_kubernetes_manifest \ "${stack}" \ 'kubernetes_manifest.argocd_application["container-registry"]' \ "${namespace}" \ "applications.argoproj.io" \ "argoproj.io/v1alpha1" \ "Application" \ "container-registry" adopt_tofu_kubernetes_manifest \ "${stack}" \ 'kubernetes_manifest.argocd_application["website-production"]' \ "${namespace}" \ "applications.argoproj.io" \ "argoproj.io/v1alpha1" \ "Application" \ "website-production" adopt_tofu_kubernetes_manifest \ "${stack}" \ 'kubernetes_manifest.argocd_application["demos-static"]' \ "${namespace}" \ "applications.argoproj.io" \ "argoproj.io/v1alpha1" \ "Application" \ "demos-static" } ensure_homelab_node_labels() { local control_plane_node="${LAB_CONTROL_PLANE_NODE_NAME:-debian}" local raspberry_node="${LAB_RASPBERRY_NODE_NAME:-raspberry}" local prometheus_selector="homelab.dev/node-role=app,homelab.dev/storage=nvme" local node local target_nodes echo "Applying homelab labels to existing Kubernetes nodes..." while IFS= read -r node; do [[ -n "${node}" ]] || continue if [[ "${node}" == "${control_plane_node}" ]]; then kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=control-plane \ homelab.dev/storage=local \ --overwrite continue fi kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ node-role.kubernetes.io/worker=worker \ --overwrite if [[ "${node}" == pimox-worker-* ]]; then kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=app \ homelab.dev/storage=nvme \ --overwrite elif [[ "${node}" == "${raspberry_node}" ]]; then kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=edge-app \ homelab.dev/storage=local \ --overwrite fi done < <(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}') target_nodes="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -l "${prometheus_selector}" -o name)" if [[ -z "${target_nodes}" ]]; then echo "No nodes match ${prometheus_selector}; refusing to move prometheus-stack." >&2 exit 1 fi } delete_prometheus_stack_storage() { local namespace="${1:-monitoring}" local pattern='(^|-)prometheus-stack-(prometheus|alertmanager|grafana)(-|$)|^prometheus-prometheus-stack|^alertmanager-prometheus-stack|^storage-prometheus-stack-grafana' local pvc_names local pv_names pvc_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pvc -o name 2>/dev/null | awk -F/ -v pattern="${pattern}" '$2 ~ pattern {print $2}')" pv_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get pv \ -o jsonpath='{range .items[?(@.spec.claimRef.namespace=="'"${namespace}"'")]}{.metadata.name}{"\t"}{.spec.claimRef.name}{"\n"}{end}' 2>/dev/null | awk -v pattern="${pattern}" '$2 ~ pattern {print $1}')" if [[ -n "${pvc_names}" ]]; then echo "Deleting old prometheus-stack PVCs in ${namespace}; saved Prometheus, Alertmanager, and Grafana data will be discarded..." printf '%s\n' "${pvc_names}" | xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" delete pvc --wait=true --timeout=180s fi if [[ -n "${pv_names}" ]]; then echo "Deleting old prometheus-stack retained PV objects..." printf '%s\n' "${pv_names}" | xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" delete pv --wait=false fi } run_tofu_stack() { local stack="$1" local -a apply_args=(-auto-approve) if [[ "${stack}" == "bootstrap/cluster" && -n "${LAB_CLUSTER_VAR_FILE:-}" ]]; then apply_args+=("-var-file=${LAB_CLUSTER_VAR_FILE}") fi tofu -chdir="${REPO_ROOT}/${stack}" init if [[ "${stack}" == "bootstrap/platform" ]]; then adopt_platform_existing_resources fi if [[ "${stack}" == "bootstrap/apps" ]]; then adopt_apps_existing_resources fi tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}" } move_prometheus_stack_workers() { local stack="bootstrap/platform" local namespace="${LAB_MONITORING_NAMESPACE:-monitoring}" require_debian_server "move-prometheus-stack-workers" export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}" export KUBECONFIG="${TF_VAR_kubeconfig_path}" echo "Moving prometheus-stack off the control plane. Existing prometheus-stack PVC data will be deleted." ensure_homelab_node_labels tofu -chdir="${REPO_ROOT}/${stack}" init adopt_platform_existing_resources tofu -chdir="${REPO_ROOT}/${stack}" destroy -target=helm_release.prometheus_stack -auto-approve delete_prometheus_stack_storage "${namespace}" tofu -chdir="${REPO_ROOT}/${stack}" apply -auto-approve kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pods -o wide } truthy() { case "${1,,}" in 1 | true | yes | on) return 0 ;; *) return 1 ;; esac } disabled_value() { case "${1,,}" in 0 | false | no | off | disabled) return 0 ;; *) return 1 ;; esac } worker_index_is_skipped() { local index="$1" local skip_indexes="$2" local skip_index skip_indexes="${skip_indexes//,/ }" for skip_index in ${skip_indexes}; do [[ -z "${skip_index}" ]] && continue if ! [[ "${skip_index}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_SKIP_WORKER_INDEXES must contain only comma or space separated positive integers." >&2 exit 1 fi if ((skip_index == index)); then return 0 fi done return 1 } ensure_python3() { if command -v python3 >/dev/null 2>&1; then return 0 fi sudo apt-get update sudo apt-get install -y --no-install-recommends python3 } detect_route_interface() { local target="$1" ip route get "${target}" 2>/dev/null | awk ' { for (i = 1; i <= NF; i++) { if ($i == "dev") { print $(i + 1) exit } } } ' } pimox_ssh() { local host="$1" local user="$2" local key_path="$3" shift 3 ssh -i "${key_path}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${user}@${host}" "$@" } pimox_guest_ipv4() { local guest_json local host="$1" local user="$2" local key_path="$3" local vmid="$4" local ip_prefix="$5" local qm_bin="${6:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}" guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)" if [[ -z "${guest_json}" ]]; then return 1 fi GUEST_JSON="${guest_json}" python3 - "${ip_prefix}" <<'PY' import json import os import sys prefix = sys.argv[1] try: interfaces = json.loads(os.environ.get("GUEST_JSON", "")) except Exception: sys.exit(1) for iface in interfaces or []: for address in iface.get("ip-addresses") or []: if address.get("ip-address-type") != "ipv4": continue ip = address.get("ip-address", "") if not ip or ip.startswith(("127.", "169.254.")): continue if prefix and not ip.startswith(prefix): continue print(ip) sys.exit(0) sys.exit(1) PY } pimox_worker_vm_debug() { local host="$1" local user="$2" local key_path="$3" local vmid="$4" local qm_bin="$5" pimox_ssh "${host}" "${user}" "${key_path}" "set +e echo 'Pimox VM ${vmid} status:' sudo '${qm_bin}' status '${vmid}' echo 'Pimox VM ${vmid} config summary:' sudo '${qm_bin}' config '${vmid}' | grep -E '^(agent|boot|net0|scsi0|virtio0|sata0|ide0|ide2|efidisk0):' || true echo 'Pimox VM ${vmid} guest-agent network-get-interfaces:' sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" >&2 || true } wait_for_pimox_guest_ssh() { local host="$1" local user="$2" local key_path="$3" local vmid="$4" local guest_user="$5" local guest_key_path="$6" local ip_prefix="$7" local timeout_seconds="$8" local qm_bin="${9:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}" local deadline local elapsed local guest_ip local ip_filter_description local known_hosts_file="${REPO_ROOT}/.lab/pimox-worker-known_hosts" local last_guest_ip="" local last_known_hosts_ip="" local last_ssh_output="" local next_log local ssh_deadline=0 local ssh_output local ssh_timeout_seconds="${LAB_PIMOX_GUEST_SSH_TIMEOUT_SECONDS:-600}" ip_filter_description="matching prefix ${ip_prefix}" if [[ -z "${ip_prefix}" ]]; then ip_filter_description="that is not loopback or link-local" fi if ! [[ "${ssh_timeout_seconds}" =~ ^[0-9]+$ ]] || ((ssh_timeout_seconds == 0)); then echo "LAB_PIMOX_GUEST_SSH_TIMEOUT_SECONDS must be a positive integer." >&2 return 1 fi mkdir -p "$(dirname "${known_hosts_file}")" touch "${known_hosts_file}" chmod 0600 "${known_hosts_file}" deadline=$((SECONDS + timeout_seconds)) next_log="${SECONDS}" while ((SECONDS < deadline)); do guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" "${qm_bin}" || true)" if [[ -n "${guest_ip}" ]]; then if ((ssh_deadline == 0)); then ssh_deadline=$((SECONDS + ssh_timeout_seconds)) elif ((SECONDS >= ssh_deadline)); then break fi last_guest_ip="${guest_ip}" if [[ "${last_known_hosts_ip}" != "${guest_ip}" ]]; then ssh-keygen -R "${guest_ip}" -f "${known_hosts_file}" >/dev/null 2>&1 || true last_known_hosts_ip="${guest_ip}" fi if ssh_output="$(ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="${known_hosts_file}" "${guest_user}@${guest_ip}" true 2>&1)"; then printf '%s\n' "${guest_ip}" return 0 fi last_ssh_output="${ssh_output}" fi if ((SECONDS >= next_log)); then elapsed=$((timeout_seconds - (deadline - SECONDS))) if [[ -n "${last_guest_ip}" ]]; then echo "Waiting for SSH to worker VM ${vmid} at ${last_guest_ip} as ${guest_user} (${elapsed}s elapsed)..." >&2 if [[ -n "${last_ssh_output}" ]]; then echo "Last SSH failure: ${last_ssh_output}" >&2 fi else echo "Waiting for worker VM ${vmid} to report an IPv4 address ${ip_filter_description} through qemu-guest-agent (${elapsed}s elapsed)..." >&2 fi next_log=$((SECONDS + 60)) fi sleep 10 done if [[ -n "${last_guest_ip}" ]]; then echo "Worker VM ${vmid} reported guest IP ${last_guest_ip}, but SSH as ${guest_user} never became reachable." >&2 if [[ -n "${last_ssh_output}" ]]; then echo "Last SSH failure: ${last_ssh_output}" >&2 fi else echo "Worker VM ${vmid} did not report an IPv4 address ${ip_filter_description} through qemu-guest-agent." >&2 fi pimox_worker_vm_debug "${host}" "${user}" "${key_path}" "${vmid}" "${qm_bin}" return 1 } pimox_generated_mac() { local vmid="$1" printf '02:68:10:%02x:%02x:%02x\n' \ $(((vmid >> 16) & 255)) \ $(((vmid >> 8) & 255)) \ $((vmid & 255)) } cpuset_cpu_count() { local cpuset="$1" local count=0 local part local start local end local -a parts IFS=',' read -r -a parts <<<"${cpuset}" for part in "${parts[@]}"; do if [[ "${part}" =~ ^([0-9]+)-([0-9]+)$ ]]; then start="${BASH_REMATCH[1]}" end="${BASH_REMATCH[2]}" if ((end < start)); then return 1 fi count=$((count + end - start + 1)) elif [[ "${part}" =~ ^[0-9]+$ ]]; then count=$((count + 1)) else return 1 fi done printf '%s\n' "${count}" } pimox_worker_cpu_affinity() { local index="$1" local affinities="$2" local worker_cores="$3" local affinity local affinity_index=1 local cpu_count for affinity in ${affinities}; do if ((affinity_index == index)); then if ! cpu_count="$(cpuset_cpu_count "${affinity}")"; then echo "Invalid Pimox worker CPU affinity '${affinity}'. Use CPU IDs or ranges, such as 4-5." >&2 exit 1 fi if ((cpu_count != worker_cores)); then echo "Pimox worker index ${index} uses ${worker_cores} cores but affinity '${affinity}' contains ${cpu_count} CPUs." >&2 exit 1 fi printf '%s\n' "${affinity}" return 0 fi affinity_index=$((affinity_index + 1)) done echo "No LAB_PIMOX_WORKER_CPU_AFFINITIES entry exists for Pimox worker index ${index}." >&2 exit 1 } ensure_pimox_worker_node() { local index="$1" local spec_file="$2" local pimox_host="$3" local pimox_user="$4" local pimox_key="$5" local template_vmid="$6" local bridge="$7" local worker_base_vmid="$8" local worker_name_prefix="$9" local worker_node_prefix="${10}" local worker_key_prefix="${11}" local worker_cores="${12}" local worker_memory="${13}" local worker_user="${14}" local worker_key_path="${15}" local ip_prefix="${16}" local timeout_seconds="${17}" local qm_bin="${18}" local worker_storage="${19}" local worker_replace_existing="${20}" local worker_cpu_affinity="${21}" local padded local vmid local worker_key local worker_name local node_name local mac local guest_ip printf -v padded '%02d' "${index}" vmid=$((worker_base_vmid + index - 1)) worker_key="${worker_key_prefix}${padded}" worker_name="${worker_name_prefix}-${padded}" node_name="${worker_node_prefix}-${padded}" mac="$(pimox_generated_mac "${vmid}")" if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2 exit 1 fi if truthy "${worker_replace_existing}"; then echo "Replacing existing Pimox worker VM ${vmid} (${worker_name}) before cloning from template ${template_vmid}..." pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu sudo '${qm_bin}' stop '${vmid}' >/dev/null 2>&1 || true elapsed=0 while [ \"\$elapsed\" -lt 300 ]; do if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then break fi sleep 5 elapsed=\$((elapsed + 5)) done sudo '${qm_bin}' destroy '${vmid}' --purge 1 >/dev/null 2>&1 || sudo '${qm_bin}' destroy '${vmid}'" else pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu sudo '${qm_bin}' set '${vmid}' --agent enabled=1 --sockets 1 --cores '${worker_cores}' --memory '${worker_memory}' if [ -n '${worker_cpu_affinity}' ]; then affinity_output=\"\$(sudo '${qm_bin}' set '${vmid}' --affinity '${worker_cpu_affinity}' 2>&1)\" || { case \"\$affinity_output\" in *'Unknown option: affinity'*) echo 'Pimox qm does not support --affinity; skipping CPU affinity ${worker_cpu_affinity} for VM ${vmid}.' ;; *) printf '%s\n' \"\$affinity_output\" >&2 exit 1 ;; esac } fi if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi" fi fi if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu if ! ip link show '${bridge}' >/dev/null 2>&1; then echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2 exit 1 fi pvesm_cmd=\"\$(command -v pvesm 2>/dev/null || true)\" if [ -z \"\$pvesm_cmd\" ] && [ -x /usr/sbin/pvesm ]; then pvesm_cmd=/usr/sbin/pvesm fi if [ -z \"\$pvesm_cmd\" ]; then echo 'pvesm was not found; cannot validate Pimox worker storage ${worker_storage}' >&2 exit 1 fi if ! sudo \"\$pvesm_cmd\" status | awk -v storage='${worker_storage}' 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then echo 'Pimox worker storage ${worker_storage} was not found. Refusing to create worker ${worker_name}.' >&2 exit 1 fi sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 --storage '${worker_storage}' sudo '${qm_bin}' set '${vmid}' --agent enabled=1 sudo '${qm_bin}' set '${vmid}' --sockets 1 --cores '${worker_cores}' --memory '${worker_memory}' if [ -n '${worker_cpu_affinity}' ]; then affinity_output=\"\$(sudo '${qm_bin}' set '${vmid}' --affinity '${worker_cpu_affinity}' 2>&1)\" || { case \"\$affinity_output\" in *'Unknown option: affinity'*) echo 'Pimox qm does not support --affinity; skipping CPU affinity ${worker_cpu_affinity} for VM ${vmid}.' ;; *) printf '%s\n' \"\$affinity_output\" >&2 exit 1 ;; esac } fi sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}' sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0' sudo '${qm_bin}' set '${vmid}' --onboot 1 sudo '${qm_bin}' start '${vmid}'" fi if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}" "${qm_bin}")"; then echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2 exit 1 fi printf '%s\t%s\t%s\t%s\t%s\n' "${worker_key}" "${guest_ip}" "${worker_user}" "${node_name}" "${worker_key_path}" >>"${spec_file}" } write_cluster_worker_var_file() { local spec_file="$1" local var_file="$2" LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-false}" \ LAB_RASPBERRY_HOST="${LAB_RASPBERRY_HOST:-192.168.100.89}" \ LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \ LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \ LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \ LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \ LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \ python3 - "${spec_file}" "${var_file}" <<'PY' import json import os import sys spec_file, var_file = sys.argv[1:3] nodes = {} node_labels = {} try: raspberry_labels = json.loads(os.environ["LAB_RASPBERRY_NODE_LABELS_JSON"]) pimox_labels = json.loads(os.environ["LAB_PIMOX_WORKER_NODE_LABELS_JSON"]) except json.JSONDecodeError as exc: raise SystemExit(f"Invalid node label JSON: {exc}") from exc if os.environ["LAB_INCLUDE_RASPBERRY_WORKER"].lower() not in {"0", "false", "no", "off", "disabled"}: nodes["raspberrypi"] = { "host": os.environ["LAB_RASPBERRY_HOST"], "user": os.environ["LAB_RASPBERRY_USER"], "node_name": os.environ["LAB_RASPBERRY_NODE_NAME"], "ssh_key_path": os.environ["LAB_RASPBERRY_SSH_KEY_PATH"], } node_labels["raspberrypi"] = raspberry_labels with open(spec_file, encoding="utf-8") as handle: for line in handle: line = line.rstrip("\n") if not line: continue key, host, user, node_name, ssh_key_path = line.split("\t") nodes[key] = { "host": host, "user": user, "node_name": node_name, "ssh_key_path": ssh_key_path, } node_labels[key] = pimox_labels with open(var_file, "w", encoding="utf-8") as handle: json.dump({"worker_nodes": nodes, "worker_node_labels": node_labels}, handle, indent=2) handle.write("\n") PY } prepare_cluster_worker_var_file() { local include_raspberry_default="$1" local spec_file="${REPO_ROOT}/.lab/manual-workers.tsv" local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json" export LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-${include_raspberry_default}}" mkdir -p "${REPO_ROOT}/.lab" : >"${spec_file}" write_cluster_worker_var_file "${spec_file}" "${var_file}" export LAB_CLUSTER_VAR_FILE="${var_file}" } run_pimox_pipeline() { local mode="${LAB_PIMOX_PIPELINE:-true}" local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}" local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}" local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}" local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}" local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}" local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}" local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}" local template_cores="${LAB_PIMOX_TEMPLATE_CORES:-${TF_VAR_pimox_template_cores:-2}}" local template_memory="${LAB_PIMOX_TEMPLATE_MEMORY:-${TF_VAR_pimox_template_memory:-4096}}" local template_replace_existing="${LAB_PIMOX_TEMPLATE_REPLACE_EXISTING:-${TF_VAR_pimox_template_replace_existing:-false}}" local provisioning_interface local worker_count="${LAB_PIMOX_WORKER_COUNT:-1}" local worker_base_vmid="${LAB_PIMOX_WORKER_BASE_VMID:-9010}" local worker_name_prefix="${LAB_PIMOX_WORKER_NAME_PREFIX:-pimox-worker}" local worker_node_prefix="${LAB_PIMOX_WORKER_NODE_PREFIX:-pimox-worker}" local worker_key_prefix="${LAB_PIMOX_WORKER_KEY_PREFIX:-pimox}" local worker_skip_indexes="${LAB_PIMOX_SKIP_WORKER_INDEXES:-}" local worker_cores="${LAB_PIMOX_WORKER_CORES:-2}" local worker_memory="${LAB_PIMOX_WORKER_MEMORY:-4096}" local worker_cpu_affinities="${LAB_PIMOX_WORKER_CPU_AFFINITIES:-}" local worker_replace_existing="${LAB_PIMOX_WORKER_REPLACE_EXISTING:-false}" local worker_storage="${LAB_PIMOX_WORKER_STORAGE:-${TF_VAR_pimox_worker_storage:-nvme_thin_pool}}" local worker_user="${LAB_PIMOX_WORKER_USER:-jv}" local worker_key_path="${LAB_PIMOX_WORKER_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" local ip_prefix="${LAB_PIMOX_GUEST_IP_PREFIX:-192.168.100.}" local timeout_seconds="${LAB_PIMOX_GUEST_TIMEOUT_SECONDS:-3600}" local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv" local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json" local index local readiness_output local readiness_status local worker_cpu_affinity if disabled_value "${mode}"; then return 0 fi if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then mode="true" fi if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2 exit 1 fi if ! [[ "${template_cores}" =~ ^[0-9]+$ && "${worker_cores}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_TEMPLATE_CORES and LAB_PIMOX_WORKER_CORES must be positive integers." >&2 exit 1 fi if ! [[ "${template_memory}" =~ ^[0-9]+$ && "${worker_memory}" =~ ^[0-9]+$ ]]; then echo "LAB_PIMOX_TEMPLATE_MEMORY and LAB_PIMOX_WORKER_MEMORY must be positive integer MiB values." >&2 exit 1 fi if ((template_cores == 0 || worker_cores == 0 || template_memory == 0 || worker_memory == 0)); then echo "Pimox template and worker CPU and memory values must be greater than zero." >&2 exit 1 fi if ! truthy "${worker_replace_existing}" && ! disabled_value "${worker_replace_existing}"; then echo "LAB_PIMOX_WORKER_REPLACE_EXISTING must be true or false." >&2 exit 1 fi if ! [[ "${worker_storage}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then echo "LAB_PIMOX_WORKER_STORAGE must be a valid Pimox storage identifier." >&2 exit 1 fi if [[ "${worker_storage}" == "local" ]]; then echo "LAB_PIMOX_WORKER_STORAGE cannot be local; only the Pimox template VM should live on local storage." >&2 exit 1 fi set +e readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then echo 'qm was not found in PATH and ${qm_bin} is not executable' exit 1 fi if ! ip link show '${bridge}' >/dev/null 2>&1; then echo 'bridge ${bridge} was not found' exit 1 fi if ! sudo -n true >/dev/null 2>&1; then echo 'passwordless sudo is not available for ${pimox_user}' exit 1 fi" 2>&1)" readiness_status=$? set -e if ((readiness_status != 0)); then if [[ "${mode}" == "auto" ]]; then echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready." return 0 fi echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2 exit 1 fi ensure_python3 provisioning_interface="${TF_VAR_provisioning_interface:-${LAB_PROVISIONING_INTERFACE:-$(detect_route_interface "${pimox_host}")}}" if [[ -z "${provisioning_interface}" ]]; then echo "Could not detect the Debian interface used to reach ${pimox_host}; set LAB_PROVISIONING_INTERFACE." >&2 exit 1 fi export TF_VAR_provisioning_interface="${provisioning_interface}" export TF_VAR_pimox_host="${pimox_host}" export TF_VAR_pimox_user="${pimox_user}" export TF_VAR_pimox_ssh_key_path="${pimox_key}" export TF_VAR_pimox_qm_bin="${qm_bin}" export TF_VAR_pimox_template_bridge="${bridge}" export TF_VAR_pimox_template_vmid="${template_vmid}" export TF_VAR_pimox_template_name="${template_name}" export TF_VAR_pimox_template_cores="${template_cores}" export TF_VAR_pimox_template_memory="${template_memory}" export TF_VAR_pimox_template_replace_existing="${template_replace_existing}" export TF_VAR_pimox_template_builder_enabled="${TF_VAR_pimox_template_builder_enabled:-true}" export TF_VAR_pimox_template_build_ssh_key_path="${TF_VAR_pimox_template_build_ssh_key_path:-${worker_key_path}}" export TF_VAR_pimox_template_build_user="${TF_VAR_pimox_template_build_user:-${worker_user}}" export TF_VAR_pimox_template_guest_ip_prefix="${TF_VAR_pimox_template_guest_ip_prefix:-${ip_prefix}}" export TF_VAR_pimox_template_build_timeout_seconds="${TF_VAR_pimox_template_build_timeout_seconds:-${timeout_seconds}}" echo "Preparing Pimox provisioning and Debian worker template on ${pimox_host} without changing Orange Pi host networking..." run_tofu_stack "bootstrap/provisioning" if ((worker_count == 0)); then return 0 fi if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2 exit 1 fi pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1" echo "Worker VM clones will be created on Pimox storage ${worker_storage}; template VM ${template_vmid} stays on its configured template storage." mkdir -p "${REPO_ROOT}/.lab" : >"${spec_file}" for ((index = 1; index <= worker_count; index++)); do if worker_index_is_skipped "${index}" "${worker_skip_indexes}"; then echo "Skipping Pimox worker index ${index} because LAB_PIMOX_SKIP_WORKER_INDEXES=${worker_skip_indexes}." continue fi worker_cpu_affinity="" if [[ -n "${worker_cpu_affinities}" ]]; then worker_cpu_affinity="$(pimox_worker_cpu_affinity "${index}" "${worker_cpu_affinities}" "${worker_cores}")" fi ensure_pimox_worker_node \ "${index}" \ "${spec_file}" \ "${pimox_host}" \ "${pimox_user}" \ "${pimox_key}" \ "${template_vmid}" \ "${bridge}" \ "${worker_base_vmid}" \ "${worker_name_prefix}" \ "${worker_node_prefix}" \ "${worker_key_prefix}" \ "${worker_cores}" \ "${worker_memory}" \ "${worker_user}" \ "${worker_key_path}" \ "${ip_prefix}" \ "${timeout_seconds}" \ "${qm_bin}" \ "${worker_storage}" \ "${worker_replace_existing}" \ "${worker_cpu_affinity}" done write_cluster_worker_var_file "${spec_file}" "${var_file}" export LAB_CLUSTER_VAR_FILE="${var_file}" } run_openwrt_pipeline() { local mode="${LAB_OPENWRT_VM:-${LAB_OPENWRT_PIPELINE:-false}}" local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}" local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}" local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}" local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}" local vmid="${LAB_OPENWRT_VMID:-9100}" local vm_name="${LAB_OPENWRT_NAME:-openwrt-firewall}" local storage="${LAB_OPENWRT_STORAGE:-nvme_thin_pool}" local wan_bridge="${LAB_OPENWRT_WAN_BRIDGE:-vmbr0}" local lan_bridge="${LAB_OPENWRT_LAN_BRIDGE:-vmbr1}" local cores="${LAB_OPENWRT_CORES:-2}" local memory="${LAB_OPENWRT_MEMORY:-512}" local version="${LAB_OPENWRT_VERSION:-24.10.6}" local image_url="${LAB_OPENWRT_IMAGE_URL:-}" local lan_ip="${LAB_OPENWRT_LAN_IP:-192.168.50.1}" local lan_netmask="${LAB_OPENWRT_LAN_NETMASK:-255.255.255.0}" local lan_dhcp_enabled="${LAB_OPENWRT_LAN_DHCP_ENABLED:-false}" local start_vm="${LAB_OPENWRT_START:-false}" local onboot="${LAB_OPENWRT_ONBOOT:-false}" local root_key_path="${LAB_OPENWRT_ROOT_SSH_PUBLIC_KEY_PATH:-${pimox_key}.pub}" local root_key_b64="" local lan_dhcp_ignore="1" local start_vm_flag="false" local onboot_flag="0" if disabled_value "${mode}"; then return 0 fi if ! truthy "${mode}"; then echo "LAB_OPENWRT_VM must be true or false." >&2 exit 1 fi if [[ -z "${image_url}" ]]; then image_url="https://downloads.openwrt.org/releases/${version}/targets/armsr/armv8/openwrt-${version}-armsr-armv8-generic-ext4-combined-efi.img.gz" fi if ! [[ "${vmid}" =~ ^[0-9]+$ ]]; then echo "LAB_OPENWRT_VMID must be a numeric Pimox VMID." >&2 exit 1 fi for value_name in storage wan_bridge lan_bridge vm_name; do local value="${!value_name}" if ! [[ "${value}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then echo "LAB_OPENWRT_${value_name^^} contains unsupported characters." >&2 exit 1 fi done if [[ "${storage}" == "local" ]]; then echo "LAB_OPENWRT_STORAGE cannot be local; reserve local storage for the Pimox Debian template." >&2 exit 1 fi if ! [[ "${lan_ip}" =~ ^[0-9.]+$ && "${lan_netmask}" =~ ^[0-9.]+$ ]]; then echo "LAB_OPENWRT_LAN_IP and LAB_OPENWRT_LAN_NETMASK must be IPv4-style values." >&2 exit 1 fi if truthy "${lan_dhcp_enabled}"; then lan_dhcp_ignore="0" fi if ! truthy "${start_vm}" && ! disabled_value "${start_vm}"; then echo "LAB_OPENWRT_START must be true or false." >&2 exit 1 fi if truthy "${start_vm}"; then start_vm_flag="true" fi if ! truthy "${onboot}" && ! disabled_value "${onboot}"; then echo "LAB_OPENWRT_ONBOOT must be true or false." >&2 exit 1 fi if truthy "${onboot}"; then onboot_flag="1" fi if [[ -r "${root_key_path}" ]]; then root_key_b64="$(base64 <"${root_key_path}" | tr -d '\n')" fi echo "Preparing OpenWrt firewall VM ${vmid} on ${pimox_host}; validating ${wan_bridge}, ${lan_bridge}, and ${storage} without changing Orange Pi networking..." pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "bash -s" <&2 exit 1 fi pvesm_cmd="\$(command -v pvesm 2>/dev/null || true)" if [ -z "\$pvesm_cmd" ] && [ -x /usr/sbin/pvesm ]; then pvesm_cmd=/usr/sbin/pvesm fi if [ -z "\$pvesm_cmd" ]; then echo "pvesm was not found; cannot validate Pimox storage \$storage" >&2 exit 1 fi if ! sudo -n true >/dev/null 2>&1; then echo "passwordless sudo is required for OpenWrt VM automation" >&2 exit 1 fi if ! ip link show "\$wan_bridge" >/dev/null 2>&1; then echo "WAN bridge \$wan_bridge does not exist. Refusing to change Orange Pi networking." >&2 exit 1 fi if ! ip link show "\$lan_bridge" >/dev/null 2>&1; then echo "LAN bridge \$lan_bridge does not exist. Create it manually before enabling OpenWrt automation." >&2 exit 1 fi if ! sudo "\$pvesm_cmd" status | awk -v storage="\$storage" 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then echo "Pimox storage \$storage was not found." >&2 exit 1 fi if sudo "\$qm_cmd" status "\$vmid" >/dev/null 2>&1; then if sudo "\$qm_cmd" config "\$vmid" | grep -q '^template: 1$'; then echo "VM \$vmid exists as a template; refusing to reuse it for OpenWrt." >&2 exit 1 fi sudo "\$qm_cmd" set "\$vmid" \\ --net0 "virtio,bridge=\$wan_bridge" \\ --net1 "virtio,bridge=\$lan_bridge" \\ --cores "\$cores" \\ --memory "\$memory" \\ --onboot "\$onboot" if [ "\$start_vm" = "true" ] && sudo "\$qm_cmd" status "\$vmid" | grep -q 'status: stopped'; then sudo "\$qm_cmd" start "\$vmid" fi exit 0 fi for required_cmd in curl gzip losetup mount umount awk sed; do if ! command -v "\$required_cmd" >/dev/null 2>&1; then echo "\$required_cmd is required on the Pimox host for OpenWrt image preparation" >&2 exit 1 fi done tmp_dir="\$(mktemp -d /tmp/homelab-openwrt.XXXXXX)" mnt_dir="\$tmp_dir/root" loopdev="" cleanup() { if mountpoint -q "\$mnt_dir" 2>/dev/null; then sudo umount "\$mnt_dir" || sudo umount -l "\$mnt_dir" || true fi if [ -n "\$loopdev" ]; then sudo losetup -d "\$loopdev" >/dev/null 2>&1 || true fi rm -rf "\$tmp_dir" } trap cleanup EXIT mkdir -p "\$mnt_dir" curl -fsSL "\$image_url" -o "\$tmp_dir/openwrt.img.gz" gzip -dc "\$tmp_dir/openwrt.img.gz" >"\$tmp_dir/openwrt.img" loopdev="\$(sudo losetup --find --partscan --show "\$tmp_dir/openwrt.img")" root_part="\${loopdev}p2" if [ ! -b "\$root_part" ] && echo "\$loopdev" | grep -q 'loop[0-9]\$'; then root_part="\${loopdev}p2" fi if [ ! -b "\$root_part" ]; then echo "Could not find OpenWrt root partition \$root_part after attaching image." >&2 exit 1 fi sudo mount "\$root_part" "\$mnt_dir" sudo mkdir -p "\$mnt_dir/etc/config" "\$mnt_dir/etc/dropbear" "\$mnt_dir/root/.ssh" cat >"\$tmp_dir/network" <"\$tmp_dir/dhcp" <"\$tmp_dir/firewall" <<'FIREWALL' config defaults option input 'REJECT' option output 'ACCEPT' option forward 'REJECT' option synflood_protect '1' config zone option name 'lan' list network 'lan' option input 'ACCEPT' option output 'ACCEPT' option forward 'ACCEPT' config zone option name 'wan' list network 'wan' option input 'REJECT' option output 'ACCEPT' option forward 'REJECT' option masq '1' option mtu_fix '1' config forwarding option src 'lan' option dest 'wan' config rule option name 'Allow-DHCP-Renew' option src 'wan' option proto 'udp' option dest_port '68' option target 'ACCEPT' option family 'ipv4' config rule option name 'Allow-Ping' option src 'wan' option proto 'icmp' option icmp_type 'echo-request' option family 'ipv4' option target 'ACCEPT' FIREWALL cat >"\$tmp_dir/system" <"\$tmp_dir/authorized_keys" sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/etc/dropbear/authorized_keys" sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys" sudo chmod 0600 "\$mnt_dir/etc/dropbear/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys" fi sync sudo umount "\$mnt_dir" sudo losetup -d "\$loopdev" loopdev="" sudo "\$qm_cmd" create "\$vmid" \\ --name "\$vm_name" \\ --bios ovmf \\ --cores "\$cores" \\ --memory "\$memory" \\ --net0 "virtio,bridge=\$wan_bridge" \\ --net1 "virtio,bridge=\$lan_bridge" \\ --numa 0 \\ --ostype l26 \\ --scsihw virtio-scsi-pci \\ --sockets 1 \\ --vga virtio \\ --onboot "\$onboot" sudo "\$qm_cmd" set "\$vmid" --efidisk0 "\$storage:1,efitype=4m,pre-enrolled-keys=0" sudo "\$qm_cmd" importdisk "\$vmid" "\$tmp_dir/openwrt.img" "\$storage" --format raw >/dev/null disk_volume="\$(sudo "\$qm_cmd" config "\$vmid" | awk -F': ' '/^unused[0-9]+:/ { print \$2; exit }')" if [ -z "\$disk_volume" ]; then echo "Could not find imported OpenWrt disk volume for VM \$vmid" >&2 exit 1 fi sudo "\$qm_cmd" set "\$vmid" --scsi0 "\$disk_volume" sudo "\$qm_cmd" set "\$vmid" --boot "order=scsi0" if [ "\$start_vm" = "true" ]; then sudo "\$qm_cmd" start "\$vmid" fi EOF } openwrt() { require_debian_server "openwrt" LAB_OPENWRT_VM=true run_openwrt_pipeline } cleanup_calico_links() { ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true sudo ip link delete vxlan.calico 2>/dev/null || true sudo ip link delete tunl0 2>/dev/null || true sudo ip link delete cni0 2>/dev/null || true sudo ip link delete kube-ipvs0 2>/dev/null || true ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true } cleanup_iptables() { sudo iptables -F || true sudo iptables -X || true sudo iptables -t nat -F || true sudo iptables -t nat -X || true sudo iptables -t mangle -F || true sudo iptables -t mangle -X || true sudo iptables -t raw -F || true sudo iptables -t raw -X || true if command -v ipvsadm >/dev/null 2>&1; then sudo ipvsadm --clear || true fi } cleanup_calico_runtime_files() { local path for path in /run/calico /var/run/calico; do if sudo test -e "${path}"; then sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true sudo rmdir "${path}" 2>/dev/null || true fi done } restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then sudo rm -f /etc/resolv.conf sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf fi sudo systemctl restart systemd-resolved 2>/dev/null || true } cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <( for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true done | sort -ru ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true) sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true } cleanup_node() { sudo kubeadm reset --force || true sudo systemctl stop kubelet 2>/dev/null || true sudo systemctl stop containerd 2>/dev/null || true sudo killall containerd-shim-runc-v2 2>/dev/null || true cleanup_mounts sudo rm -rf \ /etc/kubernetes/ \ /var/lib/etcd/ \ /var/lib/kubelet/ \ /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables cleanup_calico_links restore_node_dns sudo mkdir -p /etc/containerd/certs.d sudo systemctl reset-failed kubelet containerd 2>/dev/null || true sudo systemctl start containerd 2>/dev/null || true } website_registry_endpoint() { local image image="$(awk '$1 == "image:" && $2 ~ /php-website/ {print $2; exit}' "${REPO_ROOT}/apps/website/web-app.yaml")" if [[ -z "${image}" || "${image}" != */* ]]; then echo "Could not determine website registry endpoint from apps/website/web-app.yaml" >&2 exit 1 fi printf '%s\n' "${image%%/*}" } demos_registry_endpoint() { local image image="$(awk '$1 == "image:" && $2 ~ /demos-static/ {print $2; exit}' "${REPO_ROOT}/apps/demos-static/web-app.yaml")" if [[ -z "${image}" || "${image}" != */* ]]; then echo "Could not determine demos registry endpoint from apps/demos-static/web-app.yaml" >&2 exit 1 fi printf '%s\n' "${image%%/*}" } website_source_hash() { ( cd "${REPO_ROOT}" find apps/website -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}' ) } demos_source_hash() { ( cd "${REPO_ROOT}" find apps/demos-static -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}' ) } registry_image_exists() { local registry_endpoint="$1" local repository="$2" local tag="$3" local accept_header if ! command -v curl >/dev/null 2>&1; then return 1 fi accept_header="application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json" curl -fsS \ -H "Accept: ${accept_header}" \ "http://${registry_endpoint}/v2/${repository}/manifests/${tag}" >/dev/null } image_state_value() { local state_file="$1" local key="$2" awk -F= -v key="${key}" '$1 == key {print substr($0, index($0, "=") + 1); exit}' "${state_file}" 2>/dev/null || true } website_image_is_current() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" local registry_endpoint="$5" local saved_hash local saved_platforms local saved_image [[ -f "${state_file}" ]] || return 1 saved_hash="$(image_state_value "${state_file}" source_hash)" saved_platforms="$(image_state_value "${state_file}" platforms)" saved_image="$(image_state_value "${state_file}" image)" [[ "${saved_hash}" == "${source_hash}" ]] || return 1 [[ "${saved_platforms}" == "${platforms}" ]] || return 1 [[ "${saved_image}" == "${image_ref}" ]] || return 1 registry_image_exists "${registry_endpoint}" php-website latest } demos_image_is_current() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" local registry_endpoint="$5" local saved_hash local saved_platforms local saved_image [[ -f "${state_file}" ]] || return 1 saved_hash="$(image_state_value "${state_file}" source_hash)" saved_platforms="$(image_state_value "${state_file}" platforms)" saved_image="$(image_state_value "${state_file}" image)" [[ "${saved_hash}" == "${source_hash}" ]] || return 1 [[ "${saved_platforms}" == "${platforms}" ]] || return 1 [[ "${saved_image}" == "${image_ref}" ]] || return 1 registry_image_exists "${registry_endpoint}" demos-static latest } write_website_image_state() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" mkdir -p "$(dirname "${state_file}")" { printf 'source_hash=%s\n' "${source_hash}" printf 'platforms=%s\n' "${platforms}" printf 'image=%s\n' "${image_ref}" } > "${state_file}" } write_demos_image_state() { local state_file="$1" local source_hash="$2" local platforms="$3" local image_ref="$4" mkdir -p "$(dirname "${state_file}")" { printf 'source_hash=%s\n' "${source_hash}" printf 'platforms=%s\n' "${platforms}" printf 'image=%s\n' "${image_ref}" } > "${state_file}" } path_available_mb() { local path="$1" while [[ ! -e "${path}" && "${path}" != "/" ]]; do path="$(dirname "${path}")" done df -Pm "${path}" | awk 'NR == 2 {print $4}' } docker_root_dir() { docker info --format '{{.DockerRootDir}}' 2>/dev/null || printf '/var/lib/docker\n' } prune_unused_docker_build_data() { docker buildx rm lab-builder 2>/dev/null || true docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true docker builder prune -af 2>/dev/null || true docker system prune -af 2>/dev/null || true } ensure_docker_build_space() { local docker_root local free_mb local min_free_mb min_free_mb="${DOCKER_BUILD_MIN_FREE_MB:-4096}" docker_root="$(docker_root_dir)" free_mb="$(path_available_mb "${docker_root}")" if (( free_mb >= min_free_mb )); then return 0 fi echo "Docker data root ${docker_root} has ${free_mb}MiB free; pruning unused Docker build data..." prune_unused_docker_build_data free_mb="$(path_available_mb "${docker_root}")" if (( free_mb < min_free_mb )); then echo "Docker data root ${docker_root} still has only ${free_mb}MiB free after cleanup." >&2 echo "Free space there or move Docker's data-root to a larger filesystem such as /home before building." >&2 echo "Override the threshold with DOCKER_BUILD_MIN_FREE_MB if this host can build with less space." >&2 exit 1 fi } prepare_buildx_builder() { local registry_endpoint="$1" docker run --rm --privileged multiarch/qemu-user-static --reset -p yes cat < "${BUILDX_CONFIG}" [registry."${registry_endpoint}"] http = true insecure = true [registry."127.0.0.1:30500"] http = true insecure = true [registry."localhost:30500"] http = true insecure = true EOF docker buildx rm lab-builder 2>/dev/null || true docker buildx create --name lab-builder --driver docker-container --driver-opt network=host --config "${BUILDX_CONFIG}" --use docker buildx inspect --bootstrap } dump_argocd_debug() { local app="$1" kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true } dump_namespace_debug() { local namespace="$1" kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true } wait_for_namespace() { local namespace="$1" local app="$2" local timeout_seconds="$3" local elapsed=0 until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2 dump_argocd_debug "${app}" exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } wait_for_namespaced_resource() { local namespace="$1" local kind="$2" local name="$3" local app="$4" local timeout_seconds="$5" local elapsed=0 until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2 dump_argocd_debug "${app}" kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } wait_for_deployment_ready() { local namespace="$1" local deployment="$2" local app="$3" local timeout_seconds="$4" local desired_replicas local ready_replicas local elapsed=0 desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" desired_replicas="${desired_replicas:-1}" until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \ (( ${ready_replicas:-0} >= desired_replicas )); do if ((elapsed >= timeout_seconds)); then echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2 dump_argocd_debug "${app}" dump_namespace_debug "${namespace}" exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } deploy_gitea() { local mode="${LAB_GITEA_DEPLOY:-true}" local gitea_host="${LAB_GITEA_HOST:-${LAB_RASPBERRY_HOST:-192.168.100.89}}" local gitea_user="${LAB_GITEA_USER:-${LAB_RASPBERRY_USER:-jv}}" local gitea_key="${LAB_GITEA_SSH_KEY_PATH:-${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}}" local install_dir="${LAB_GITEA_INSTALL_DIR:-/opt/homelab-gitea}" local image="${LAB_GITEA_IMAGE:-gitea/gitea:1.21.7}" local http_port="${LAB_GITEA_HTTP_PORT:-3000}" local ssh_port="${LAB_GITEA_SSH_PORT:-32222}" local domain="${LAB_GITEA_DOMAIN:-lab2025.duckdns.org}" local root_url="${LAB_GITEA_ROOT_URL:-https://lab2025.duckdns.org/git/}" local container_name="${LAB_GITEA_CONTAINER_NAME:-homelab-gitea}" local compose_file="${REPO_ROOT}/infra/gitea/docker-compose.yml" require_debian_server "deploy-gitea" if disabled_value "${mode}"; then install_gitea_backup_timer return 0 fi if [[ ! -s "${compose_file}" ]]; then echo "Missing ${compose_file}" >&2 exit 1 fi echo "Deploying external Gitea on ${gitea_user}@${gitea_host}:${http_port}..." ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "rm -rf /tmp/homelab-gitea && mkdir -p /tmp/homelab-gitea" scp -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${compose_file}" "${gitea_user}@${gitea_host}:/tmp/homelab-gitea/docker-compose.yml" ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "set -eu install_dir='${install_dir}' install_missing_packages() { missing_packages='' for package in \"\$@\"; do if ! dpkg-query -W -f='\${Status}' \"\$package\" 2>/dev/null | grep -q 'install ok installed'; then missing_packages=\"\$missing_packages \$package\" fi done if [ -n \"\$missing_packages\" ]; then sudo apt-get update sudo apt-get install -y --no-install-recommends \$missing_packages fi } install_missing_packages ca-certificates curl iptables if ! command -v docker >/dev/null 2>&1; then curl -fsSL https://get.docker.com | sudo sh fi if ! sudo docker compose version >/dev/null 2>&1; then install_missing_packages docker-compose-plugin fi repair_docker_iptables() { if sudo iptables -t nat -S DOCKER >/dev/null 2>&1; then return 0 fi echo 'Docker NAT chain is missing on the Gitea host; restarting Docker once to restore iptables state...' sudo systemctl restart docker sleep 3 if sudo iptables -t nat -S DOCKER >/dev/null 2>&1; then return 0 fi echo 'Docker NAT chain is still missing after restarting Docker.' >&2 sudo iptables -t nat -S >&2 || true sudo systemctl status docker --no-pager -l >&2 || true exit 1 } repair_docker_iptables sudo mkdir -p \"\$install_dir/data\" sudo cp /tmp/homelab-gitea/docker-compose.yml \"\$install_dir/docker-compose.yml\" sudo chown -R 1000:1000 \"\$install_dir/data\" sudo tee \"\$install_dir/.env\" >/dev/null </dev/null 2>&1; then openssl rand -hex 32 return 0 fi python3 - <<'PY' import secrets print(secrets.token_hex(32)) PY } gitea_api_base_url() { local gitea_host="$1" local http_port="$2" local candidate local api_base_override="${LAB_GITEA_API_BASE_URL:-}" if [[ -n "${api_base_override}" ]]; then printf '%s\n' "${api_base_override%/}" return 0 fi for candidate in "http://${gitea_host}:${http_port}/api/v1" "http://${gitea_host}:${http_port}/git/api/v1"; do if curl -fsS "${candidate}/version" >/dev/null 2>&1; then printf '%s\n' "${candidate}" return 0 fi done echo "Could not reach the Gitea API on ${gitea_host}:${http_port}." >&2 exit 1 } gitea_repo_exists() { local api_base="$1" local auth_user="$2" local auth_password="$3" local owner="$4" local repo_name="$5" local status status="$(curl -sS -o /dev/null -w '%{http_code}' -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}")" case "${status}" in 200) return 0 ;; 404) return 1 ;; 401 | 403) echo "Gitea API authentication failed for ${auth_user} while checking ${owner}/${repo_name}." >&2 exit 1 ;; *) echo "Unexpected Gitea API response ${status} while checking ${owner}/${repo_name}." >&2 exit 1 ;; esac } gitea_branch_exists() { local api_base="$1" local auth_user="$2" local auth_password="$3" local owner="$4" local repo_name="$5" local branch="$6" local status status="$(curl -sS -o /dev/null -w '%{http_code}' -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}/branches/${branch}")" case "${status}" in 200) return 0 ;; 404) return 1 ;; 401 | 403) echo "Gitea API authentication failed for ${auth_user} while checking ${owner}/${repo_name}:${branch}." >&2 exit 1 ;; *) echo "Unexpected Gitea API response ${status} while checking ${owner}/${repo_name}:${branch}." >&2 exit 1 ;; esac } create_gitea_repo() { local api_base="$1" local auth_user="$2" local auth_password="$3" local repo_name="$4" local default_branch="$5" local payload payload="$(python3 - "${repo_name}" "${default_branch}" <<'PY' import json import sys repo_name, default_branch = sys.argv[1:3] print(json.dumps({ "name": repo_name, "private": False, "auto_init": False, "default_branch": default_branch, "description": "Homelab infrastructure configuration", })) PY )" curl -fsS \ -u "${auth_user}:${auth_password}" \ -H "Content-Type: application/json" \ -X POST \ -d "${payload}" \ "${api_base}/user/repos" >/dev/null } gitea_public_key_registered() { local api_base="$1" local auth_user="$2" local auth_password="$3" local owner="$4" local repo_name="$5" local public_key_path="$6" local repo_keys local user_keys user_keys="$(curl -fsS -u "${auth_user}:${auth_password}" "${api_base}/user/keys?limit=100")" repo_keys="$(curl -fsS -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}/keys?limit=100")" GITEA_PUBLIC_KEY="$(<"${public_key_path}")" \ GITEA_USER_KEYS="${user_keys}" \ GITEA_REPO_KEYS="${repo_keys}" \ python3 - <<'PY' import json import os import sys public_key = os.environ["GITEA_PUBLIC_KEY"].strip() for env_name in ("GITEA_USER_KEYS", "GITEA_REPO_KEYS"): for key in json.loads(os.environ[env_name]) or []: if key.get("key", "").strip() == public_key: sys.exit(0) sys.exit(1) PY } create_gitea_repo_deploy_key() { local api_base="$1" local auth_user="$2" local auth_password="$3" local owner="$4" local repo_name="$5" local title="$6" local public_key_path="$7" local read_only="$8" local payload payload="$( GITEA_DEPLOY_KEY_TITLE="${title}" \ GITEA_PUBLIC_KEY="$(<"${public_key_path}")" \ GITEA_DEPLOY_KEY_READ_ONLY="${read_only}" \ python3 - <<'PY' import json import os print(json.dumps({ "title": os.environ["GITEA_DEPLOY_KEY_TITLE"], "key": os.environ["GITEA_PUBLIC_KEY"].strip(), "read_only": os.environ["GITEA_DEPLOY_KEY_READ_ONLY"] == "true", })) PY )" curl -fsS \ -u "${auth_user}:${auth_password}" \ -H "Content-Type: application/json" \ -X POST \ -d "${payload}" \ "${api_base}/repos/${owner}/${repo_name}/keys" >/dev/null } ensure_gitea_repo_ssh_access() { local api_base="$1" local auth_user="$2" local auth_password="$3" local owner="$4" local repo_name="$5" local ssh_host="$6" local ssh_port="$7" local key_path="$8" local key_title="$9" local key_read_only="${10}" local key_dir local known_hosts local public_key_path local read_only_json="false" local ssh_repo_url if [[ "${key_path}" =~ [[:space:]] || "${key_path}" == *"'"* ]]; then echo "LAB_GITEA_REPO_SSH_KEY_PATH cannot contain whitespace or single quotes." >&2 exit 1 fi key_dir="$(dirname "${key_path}")" public_key_path="${key_path}.pub" mkdir -p "${key_dir}" chmod 0700 "${key_dir}" if [[ ! -s "${key_path}" && ! -s "${public_key_path}" ]]; then ssh-keygen -t ed25519 -N "" -f "${key_path}" -C "${key_title}" >/dev/null elif [[ -s "${key_path}" && ! -s "${public_key_path}" ]]; then ssh-keygen -y -f "${key_path}" >"${public_key_path}" elif [[ ! -s "${key_path}" ]]; then echo "Public key ${public_key_path} exists, but private key ${key_path} is missing." >&2 exit 1 fi chmod 0600 "${key_path}" chmod 0644 "${public_key_path}" if truthy "${key_read_only}"; then read_only_json="true" fi if gitea_public_key_registered "${api_base}" "${auth_user}" "${auth_password}" "${owner}" "${repo_name}" "${public_key_path}"; then echo "Gitea already has Debian host SSH key ${public_key_path}." else create_gitea_repo_deploy_key "${api_base}" "${auth_user}" "${auth_password}" "${owner}" "${repo_name}" "${key_title}" "${public_key_path}" "${read_only_json}" echo "Added Debian host SSH key ${public_key_path} to ${owner}/${repo_name}." fi known_hosts="${HOME}/.ssh/known_hosts" touch "${known_hosts}" chmod 0644 "${known_hosts}" if ! ssh-keygen -F "[${ssh_host}]:${ssh_port}" -f "${known_hosts}" >/dev/null 2>&1; then ssh-keyscan -p "${ssh_port}" "${ssh_host}" >>"${known_hosts}" 2>/dev/null fi ssh_repo_url="ssh://git@${ssh_host}:${ssh_port}/${owner}/${repo_name}.git" git -C "${REPO_ROOT}" remote set-url gitea "${ssh_repo_url}" 2>/dev/null || git -C "${REPO_ROOT}" remote add gitea "${ssh_repo_url}" git -C "${REPO_ROOT}" config core.sshCommand "ssh -i ${key_path} -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new" git -C "${REPO_ROOT}" ls-remote gitea HEAD >/dev/null echo "Gitea SSH remote: ${ssh_repo_url}" } bootstrap_gitea_repo() { local mode="${LAB_GITEA_REPO_BOOTSTRAP:-true}" local gitea_host="${LAB_GITEA_HOST:-${LAB_RASPBERRY_HOST:-192.168.100.89}}" local gitea_user="${LAB_GITEA_USER:-${LAB_RASPBERRY_USER:-jv}}" local gitea_key="${LAB_GITEA_SSH_KEY_PATH:-${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}}" local container_name="${LAB_GITEA_CONTAINER_NAME:-homelab-gitea}" local http_port="${LAB_GITEA_HTTP_PORT:-3000}" local ssh_port="${LAB_GITEA_SSH_PORT:-32222}" local root_url="${LAB_GITEA_ROOT_URL:-https://lab2025.duckdns.org/git/}" local repo_owner="${LAB_GITEA_REPO_OWNER:-jv}" local repo_name="${LAB_GITEA_REPO_NAME:-my-homelab-configs}" local default_branch="${LAB_GITEA_REPO_DEFAULT_BRANCH:-main}" local bootstrap_user="${LAB_GITEA_BOOTSTRAP_USER:-${repo_owner}}" local bootstrap_email="${LAB_GITEA_BOOTSTRAP_EMAIL:-${bootstrap_user}@homelab.local}" local credentials_file="${LAB_GITEA_BOOTSTRAP_CREDENTIALS_FILE:-${HOME}/.config/homelab/gitea-bootstrap.env}" local bootstrap_password="${LAB_GITEA_BOOTSTRAP_PASSWORD:-}" local allow_dirty="${LAB_GITEA_BOOTSTRAP_ALLOW_DIRTY:-false}" local ssh_bootstrap="${LAB_GITEA_REPO_SSH_BOOTSTRAP:-true}" local ssh_key_path="${LAB_GITEA_REPO_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" local ssh_key_title="${LAB_GITEA_REPO_DEPLOY_KEY_TITLE:-debian-host-${repo_name}}" local ssh_key_read_only="${LAB_GITEA_REPO_DEPLOY_KEY_READ_ONLY:-false}" local api_base local public_repo_url local direct_repo_url local push_url local askpass local credentials_dir local remote_status local worktree_status require_debian_server "bootstrap-gitea-repo" if disabled_value "${mode}"; then return 0 fi ensure_python3 for value_name in repo_owner repo_name default_branch bootstrap_user; do local value="${!value_name}" if ! [[ "${value}" =~ ^[A-Za-z0-9_.-]+$ ]]; then echo "${value_name} contains unsupported characters." >&2 exit 1 fi done if [[ "${bootstrap_email}" == *"'"* ]]; then echo "LAB_GITEA_BOOTSTRAP_EMAIL cannot contain a single quote." >&2 exit 1 fi if ! [[ "${ssh_port}" =~ ^[0-9]+$ ]]; then echo "LAB_GITEA_SSH_PORT must be numeric." >&2 exit 1 fi if [[ -z "${bootstrap_password}" && -r "${credentials_file}" ]]; then # shellcheck disable=SC1090 source "${credentials_file}" bootstrap_user="${GITEA_BOOTSTRAP_USER:-${bootstrap_user}}" bootstrap_email="${GITEA_BOOTSTRAP_EMAIL:-${bootstrap_email}}" bootstrap_password="${GITEA_BOOTSTRAP_PASSWORD:-}" fi if [[ -z "${bootstrap_password}" ]]; then bootstrap_password="$(gitea_bootstrap_password)" credentials_dir="$(dirname "${credentials_file}")" mkdir -p "${credentials_dir}" chmod 0700 "${credentials_dir}" { printf "GITEA_BOOTSTRAP_USER='%s'\n" "${bootstrap_user}" printf "GITEA_BOOTSTRAP_EMAIL='%s'\n" "${bootstrap_email}" printf "GITEA_BOOTSTRAP_PASSWORD='%s'\n" "${bootstrap_password}" } > "${credentials_file}" chmod 0600 "${credentials_file}" echo "Generated Gitea bootstrap credentials at ${credentials_file}." fi for value_name in repo_owner repo_name default_branch bootstrap_user; do local value="${!value_name}" if ! [[ "${value}" =~ ^[A-Za-z0-9_.-]+$ ]]; then echo "${value_name} contains unsupported characters." >&2 exit 1 fi done for value_name in bootstrap_email bootstrap_password; do local value="${!value_name}" if [[ "${value}" == *"'"* ]]; then echo "${value_name} cannot contain a single quote." >&2 exit 1 fi done echo "Bootstrapping Gitea repository ${repo_owner}/${repo_name}..." # shellcheck disable=SC2087 ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "bash -s" </dev/null 2>&1; then echo "Gitea container \${container_name} is not running on ${gitea_host}." >&2 exit 1 fi for attempt in \$(seq 1 60); do if curl -fsS http://127.0.0.1:3000/api/v1/version >/dev/null 2>&1 || curl -fsS http://127.0.0.1:3000/git/api/v1/version >/dev/null 2>&1; then break fi if [ "\${attempt}" = "60" ]; then echo "Timed out waiting for Gitea API inside \${container_name}." >&2 exit 1 fi sleep 2 done if ! sudo docker exec -u git "\${container_name}" gitea -c /data/gitea/conf/app.ini admin user create \ --username "\${bootstrap_user}" \ --password "\${bootstrap_password}" \ --email "\${bootstrap_email}" \ --admin \ --must-change-password=false >/tmp/homelab-gitea-user-create.log 2>&1; then if ! sudo docker exec -u git "\${container_name}" gitea -c /data/gitea/conf/app.ini admin user list | awk -v user="\${bootstrap_user}" 'NR > 1 && \$2 == user { found = 1 } END { exit found ? 0 : 1 }'; then cat /tmp/homelab-gitea-user-create.log >&2 exit 1 fi fi EOF api_base="$(gitea_api_base_url "${gitea_host}" "${http_port}")" if gitea_repo_exists "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_owner}" "${repo_name}"; then echo "Gitea repository ${repo_owner}/${repo_name} already exists." else if [[ "${repo_owner}" != "${bootstrap_user}" ]]; then echo "Gitea repository owner ${repo_owner} does not exist yet; only user-owned bootstrap repos are supported." >&2 exit 1 fi create_gitea_repo "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_name}" "${default_branch}" echo "Created Gitea repository ${repo_owner}/${repo_name}." fi public_repo_url="${root_url%/}/${repo_owner}/${repo_name}.git" if [[ "${api_base}" == */git/api/v1 ]]; then direct_repo_url="http://${gitea_host}:${http_port}/git/${repo_owner}/${repo_name}.git" else direct_repo_url="http://${gitea_host}:${http_port}/${repo_owner}/${repo_name}.git" fi push_url="${LAB_GITEA_BOOTSTRAP_PUSH_URL:-${direct_repo_url}}" git -C "${REPO_ROOT}" rev-parse --is-inside-work-tree >/dev/null git -C "${REPO_ROOT}" remote set-url gitea "${public_repo_url}" 2>/dev/null || git -C "${REPO_ROOT}" remote add gitea "${public_repo_url}" if gitea_branch_exists "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_owner}" "${repo_name}" "${default_branch}"; then echo "Gitea branch ${default_branch} already exists; leaving existing history unchanged." else worktree_status="$(git -C "${REPO_ROOT}" status --porcelain)" if [[ -n "${worktree_status}" ]] && ! truthy "${allow_dirty}"; then echo "Refusing to seed Gitea from a dirty working tree; commit or stash changes first." >&2 echo "Set LAB_GITEA_BOOTSTRAP_ALLOW_DIRTY=true to push committed HEAD anyway." >&2 exit 1 fi askpass="$(mktemp)" trap 'rm -f "${askpass}" "${BUILDX_CONFIG}"' EXIT cat > "${askpass}" </dev/null </dev/null 2>&1 || true" >/dev/null 2>&1 || true } trap cleanup EXIT ssh_gitea "set -eu sudo docker exec -u git '\${GITEA_CONTAINER}' rm -f '\${REMOTE_ARCHIVE}' >/dev/null 2>&1 || true sudo docker exec -u git '\${GITEA_CONTAINER}' sh -c 'mkdir -p /data/git/repositories' sudo docker exec -u git '\${GITEA_CONTAINER}' gitea dump -c /data/gitea/conf/app.ini --file '\${REMOTE_ARCHIVE}' sudo docker cp '\${GITEA_CONTAINER}:\${REMOTE_ARCHIVE}' '\${remote_host_archive}' sudo chown '\${GITEA_USER}:\${GITEA_USER}' '\${remote_host_archive}' sudo docker exec -u git '\${GITEA_CONTAINER}' rm -f '\${REMOTE_ARCHIVE}' >/dev/null 2>&1 || true" scp -i "\${GITEA_SSH_KEY_PATH}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new \ "\${GITEA_USER}@\${GITEA_HOST}:\${remote_host_archive}" "\${tmp_archive}" sudo mkdir -p "\${GITEA_BACKUP_DIR}" sudo chown jv:jv "\${GITEA_BACKUP_DIR}" sudo install -m 0640 -o jv -g jv "\${tmp_archive}" "\${backup_archive}" sudo find "\${GITEA_BACKUP_DIR}" -type f -name 'gitea-*.zip' -mtime +"\${GITEA_BACKUP_RETENTION_DAYS}" -delete echo "Created \${backup_archive}" BACKUP_SCRIPT_EOT sudo chmod 0755 "${backup_script}" sudo tee /etc/systemd/system/homelab-gitea-backup.service >/dev/null <<'SERVICE_EOT' [Unit] Description=Back up external Homelab Gitea to Debian host storage After=network-online.target Wants=network-online.target [Service] Type=oneshot ExecStart=/usr/local/sbin/homelab-gitea-backup.sh SERVICE_EOT sudo tee /etc/systemd/system/homelab-gitea-backup.timer >/dev/null <<'TIMER_EOT' [Unit] Description=Run daily Homelab Gitea backups [Timer] OnCalendar=*-*-* 02:35:00 RandomizedDelaySec=20m Persistent=true [Install] WantedBy=timers.target TIMER_EOT sudo tee "${restore_drill_script}" >/dev/null <<'RESTORE_DRILL_SCRIPT_EOT' #!/usr/bin/env bash set -euo pipefail GITEA_BACKUP_DIR="${GITEA_BACKUP_DIR:-/home/jv/backups/gitea}" GITEA_RESTORE_DRILL_DIR="${GITEA_RESTORE_DRILL_DIR:-/home/jv/backups/gitea-restore-drills}" GITEA_RESTORE_DRILL_RETENTION_DAYS="${GITEA_RESTORE_DRILL_RETENTION_DAYS:-90}" if ! command -v python3 >/dev/null 2>&1; then echo "python3 is required for Gitea restore drills." >&2 exit 1 fi latest_archive="$( { find "${GITEA_BACKUP_DIR}" -maxdepth 1 -type f -name 'gitea-*.zip' -printf '%T@ %p\n' 2>/dev/null || true; } | sort -nr | awk 'NR == 1 { sub(/^[^ ]+ /, ""); print }' )" if [[ -z "${latest_archive}" ]]; then echo "Skipping Gitea restore drill: no backup archive found in ${GITEA_BACKUP_DIR}." exit 0 fi timestamp="$(date -u +%Y%m%dT%H%M%SZ)" tmp_dir="$(mktemp -d "/tmp/gitea-restore-drill-${timestamp}.XXXXXX")" tmp_report="$(mktemp "/tmp/gitea-restore-drill-${timestamp}.XXXXXX.txt")" report_path="${GITEA_RESTORE_DRILL_DIR}/gitea-restore-drill-${timestamp}.txt" cleanup() { rm -rf "${tmp_dir}" rm -f "${tmp_report}" } trap cleanup EXIT python3 - "${latest_archive}" "${tmp_dir}" "${tmp_report}" <<'PY' import os import sys import zipfile archive_path, extract_dir, report_path = sys.argv[1:4] with zipfile.ZipFile(archive_path) as archive: bad_member = archive.testzip() if bad_member: raise SystemExit(f"ZIP integrity check failed at {bad_member}") members = archive.infolist() if not members: raise SystemExit("ZIP archive is empty") extract_root = os.path.abspath(extract_dir) for member in members: target = os.path.abspath(os.path.join(extract_root, member.filename)) if target != extract_root and not target.startswith(extract_root + os.sep): raise SystemExit(f"Unsafe archive path: {member.filename}") archive.extractall(extract_root) file_count = 0 total_bytes = 0 for root, _, files in os.walk(extract_dir): for name in files: file_count += 1 total_bytes += os.path.getsize(os.path.join(root, name)) if file_count == 0: raise SystemExit("Archive extracted no files") with open(report_path, "w", encoding="utf-8") as handle: handle.write("Gitea restore drill report\n") handle.write(f"archive={archive_path}\n") handle.write(f"archive_size_bytes={os.path.getsize(archive_path)}\n") handle.write(f"extracted_files={file_count}\n") handle.write(f"extracted_bytes={total_bytes}\n") handle.write("result=ok\n") PY sudo mkdir -p "${GITEA_RESTORE_DRILL_DIR}" sudo install -m 0640 -o root -g root "${tmp_report}" "${report_path}" sudo find "${GITEA_RESTORE_DRILL_DIR}" -type f -name 'gitea-restore-drill-*.txt' -mtime +"${GITEA_RESTORE_DRILL_RETENTION_DAYS}" -delete echo "Created ${report_path}" RESTORE_DRILL_SCRIPT_EOT sudo chmod 0755 "${restore_drill_script}" sudo tee /etc/systemd/system/homelab-gitea-restore-drill.service >/dev/null <<'RESTORE_DRILL_SERVICE_EOT' [Unit] Description=Run a non-destructive Gitea backup restore drill After=network-online.target homelab-gitea-backup.service Wants=network-online.target [Service] Type=oneshot ExecStart=/usr/local/sbin/homelab-gitea-restore-drill.sh RESTORE_DRILL_SERVICE_EOT sudo tee /etc/systemd/system/homelab-gitea-restore-drill.timer >/dev/null <<'RESTORE_DRILL_TIMER_EOT' [Unit] Description=Run monthly Homelab Gitea restore drills [Timer] OnCalendar=monthly RandomizedDelaySec=2h Persistent=true [Install] WantedBy=timers.target RESTORE_DRILL_TIMER_EOT sudo systemctl daemon-reload sudo systemctl enable --now homelab-gitea-backup.timer >/dev/null sudo systemctl enable --now homelab-gitea-restore-drill.timer >/dev/null } backup_gitea() { require_debian_server "backup-gitea" install_gitea_backup_timer sudo /usr/local/sbin/homelab-gitea-backup.sh } drill_gitea_restore() { require_debian_server "drill-gitea-restore" install_gitea_backup_timer sudo /usr/local/sbin/homelab-gitea-restore-drill.sh } install_gitea_runner() { local runner_arch local runner_home="${GITEA_RUNNER_HOME:-/home/jv/.local/share/gitea-runner/my-homelab-configs}" local runner_instance="${GITEA_RUNNER_INSTANCE_URL:-https://lab2025.duckdns.org/git/}" local runner_labels="${GITEA_RUNNER_LABELS:-homelab-debian:host}" local runner_name="${GITEA_RUNNER_NAME:-homelab-debian-my-homelab-configs}" local runner_token="${GITEA_RUNNER_REGISTRATION_TOKEN:-${1:-}}" local runner_user="${GITEA_RUNNER_USER:-jv}" local runner_version="${GITEA_ACT_RUNNER_VERSION:-0.2.11}" local missing_packages=() require_debian_server "install-gitea-runner" case "$(dpkg --print-architecture)" in amd64) runner_arch="linux-amd64" ;; arm64) runner_arch="linux-arm64" ;; *) echo "Unsupported Debian architecture: $(dpkg --print-architecture)" >&2 exit 1 ;; esac for package in ca-certificates curl git nodejs python3; do if ! dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "install ok installed"; then missing_packages+=("$package") fi done if [[ ${#missing_packages[@]} -gt 0 ]]; then sudo apt-get update sudo apt-get install -y --no-install-recommends "${missing_packages[@]}" fi sudo curl -fsSL \ -o /usr/local/bin/act_runner \ "https://gitea.com/gitea/act_runner/releases/download/v${runner_version}/act_runner-${runner_version}-${runner_arch}" sudo chmod 0755 /usr/local/bin/act_runner sudo chown root:root /usr/local/bin/act_runner sudo -u "${runner_user}" mkdir -p "${runner_home}" if [[ ! -f "${runner_home}/.runner" ]]; then if [[ -z "${runner_token}" ]]; then echo "Set GITEA_RUNNER_REGISTRATION_TOKEN to the repository-level runner token from Gitea." >&2 exit 1 fi sudo -u "${runner_user}" env \ HOME="/home/${runner_user}" \ GITEA_RUNNER_HOME="${runner_home}" \ GITEA_RUNNER_INSTANCE_URL="${runner_instance}" \ GITEA_RUNNER_REGISTRATION_TOKEN="${runner_token}" \ GITEA_RUNNER_NAME="${runner_name}" \ GITEA_RUNNER_LABELS="${runner_labels}" \ bash -lc 'cd "${GITEA_RUNNER_HOME}" && /usr/local/bin/act_runner register --no-interactive --instance "${GITEA_RUNNER_INSTANCE_URL}" --token "${GITEA_RUNNER_REGISTRATION_TOKEN}" --name "${GITEA_RUNNER_NAME}" --labels "${GITEA_RUNNER_LABELS}"' else echo "Existing runner registration found at ${runner_home}/.runner; keeping it." fi sudo tee /etc/systemd/system/homelab-gitea-runner.service >/dev/null </dev/null sudo systemctl status homelab-gitea-runner.service --no-pager -l } recreate_pods_for_selector() { local namespace="$1" local selector="$2" local app="$3" if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2 dump_argocd_debug "${app}" dump_namespace_debug "${namespace}" exit 1 fi } refresh_argocd_application() { local app="$1" kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null } apps() { local buildx_builder_ready=false local demos_image_built=false local demos_image_ref local demos_image_state_file local demos_platforms local demos_registry_endpoint local demos_source_hash local registry_endpoint local website_image_built=false local website_image_ref local website_image_state_file local website_platforms local website_source_hash require_debian_server "apps" registry_endpoint="$(website_registry_endpoint)" demos_registry_endpoint="$(demos_registry_endpoint)" demos_image_ref="${registry_endpoint}/demos-static:latest" demos_image_state_file="${REPO_ROOT}/.lab/demos-static-image.state" demos_platforms="${DEMOS_IMAGE_PLATFORMS:-linux/arm64}" demos_source_hash="$(demos_source_hash)" website_image_ref="${registry_endpoint}/php-website:latest" website_image_state_file="${REPO_ROOT}/.lab/php-website-image.state" website_platforms="${WEBSITE_IMAGE_PLATFORMS:-linux/arm64}" website_source_hash="$(website_source_hash)" export TF_VAR_registry_endpoint="${TF_VAR_registry_endpoint:-${registry_endpoint}}" export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}" export KUBECONFIG="${TF_VAR_kubeconfig_path}" if [[ "${TF_VAR_registry_endpoint}" != "${registry_endpoint}" ]]; then echo "TF_VAR_registry_endpoint must match apps/website/web-app.yaml (${registry_endpoint})" >&2 exit 1 fi if [[ "${demos_registry_endpoint}" != "${registry_endpoint}" ]]; then echo "apps/demos-static/web-app.yaml registry endpoint (${demos_registry_endpoint}) must match apps/website/web-app.yaml (${registry_endpoint})" >&2 exit 1 fi echo "Deploying homelab applications..." run_tofu_stack "bootstrap/apps" refresh_argocd_application container-registry refresh_argocd_application demos-static refresh_argocd_application website-production wait_for_namespace container-registry container-registry 300 wait_for_namespaced_resource container-registry deployment local-registry container-registry 300 wait_for_deployment_ready container-registry local-registry container-registry 300 if website_image_is_current "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" "${registry_endpoint}"; then echo "Website image ${website_image_ref} is already current (${website_source_hash}); skipping build." else echo "Building website image ${website_image_ref} for ${website_platforms} (${website_source_hash})..." ensure_docker_build_space if [[ "${buildx_builder_ready}" != "true" ]]; then prepare_buildx_builder "${registry_endpoint}" buildx_builder_ready=true fi docker buildx build \ --network host \ --platform "${website_platforms}" \ --provenance=false \ --sbom=false \ --label "dev.homelab.website.source-hash=${website_source_hash}" \ -t "${website_image_ref}" \ -f "${REPO_ROOT}/apps/website/Dockerfile" \ "${REPO_ROOT}/apps/website/" \ --push website_image_built=true fi if demos_image_is_current "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" "${registry_endpoint}"; then echo "Demos image ${demos_image_ref} is already current (${demos_source_hash}); skipping build." else echo "Building demos image ${demos_image_ref} for ${demos_platforms} (${demos_source_hash})..." ensure_docker_build_space if [[ "${buildx_builder_ready}" != "true" ]]; then prepare_buildx_builder "${registry_endpoint}" buildx_builder_ready=true fi docker buildx build \ --network host \ --platform "${demos_platforms}" \ --provenance=false \ --sbom=false \ --label "dev.homelab.demos.source-hash=${demos_source_hash}" \ -t "${demos_image_ref}" \ -f "${REPO_ROOT}/apps/demos-static/Dockerfile" \ "${REPO_ROOT}/apps/demos-static/" \ --push demos_image_built=true fi refresh_argocd_application website-production wait_for_namespace website-production website-production 300 wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300 if [[ "${website_image_built}" == "true" ]]; then recreate_pods_for_selector website-production app=php-website website-production else echo "Skipping website pod restart because the image did not change." fi wait_for_deployment_ready website-production php-website-deployment website-production 300 if [[ "${website_image_built}" == "true" ]]; then write_website_image_state "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" fi refresh_argocd_application demos-static wait_for_namespace demos-static demos-static 300 wait_for_namespaced_resource demos-static deployment demos-static demos-static 300 if [[ "${demos_image_built}" == "true" ]]; then recreate_pods_for_selector demos-static app=demos-static demos-static else echo "Skipping demos pod restart because the image did not change." fi wait_for_deployment_ready demos-static demos-static demos-static 300 if [[ "${demos_image_built}" == "true" ]]; then write_demos_image_state "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" fi echo "Application deployment successfully completed." } up() { require_debian_server "up" echo "Deploying the homelab infrastructure..." deploy_gitea bootstrap_gitea_repo run_pimox_pipeline run_openwrt_pipeline if [[ -z "${LAB_CLUSTER_VAR_FILE:-}" ]]; then prepare_cluster_worker_var_file true fi run_tofu_stack "bootstrap/cluster" run_tofu_stack "bootstrap/platform" apps run_tofu_stack "bootstrap/edge" echo "Deployment successfully completed." } rebuild_cluster() { require_debian_server "rebuild-cluster" export WORKER_SSH_TARGETS="${WORKER_SSH_TARGETS:-}" echo "Rebuilding the Kubernetes cluster without touching external Raspberry Pi Gitea..." nuke run_pimox_pipeline run_openwrt_pipeline if [[ -z "${LAB_CLUSTER_VAR_FILE:-}" ]]; then prepare_cluster_worker_var_file true fi run_tofu_stack "bootstrap/cluster" run_tofu_stack "bootstrap/platform" apps run_tofu_stack "bootstrap/edge" echo "Cluster rebuild successfully completed." } nuke() { local worker_ssh_targets local worker_targets local target require_debian_server "nuke" echo "Brutally nuking the homelab infrastructure..." worker_ssh_targets="${WORKER_SSH_TARGETS-}" read -r -a worker_targets <<< "${worker_ssh_targets}" echo "--> Terminating local OpenTofu tasks..." killall tofu terraform 2>/dev/null || true echo "--> Eviscerating local Kubernetes components..." cleanup_node sudo rm -f "${KUBECONFIG_PATH}" for target in "${worker_targets[@]}"; do echo "--> Eviscerating remote Kubernetes components (${target})..." if ! ssh -o ConnectTimeout=5 "${target}" "bash -s" <<'EOF' set -euo pipefail cleanup_calico_links() { ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true sudo ip link delete vxlan.calico 2>/dev/null || true sudo ip link delete tunl0 2>/dev/null || true sudo ip link delete cni0 2>/dev/null || true sudo ip link delete kube-ipvs0 2>/dev/null || true ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true } cleanup_iptables() { sudo iptables -F || true sudo iptables -X || true sudo iptables -t nat -F || true sudo iptables -t nat -X || true sudo iptables -t mangle -F || true sudo iptables -t mangle -X || true sudo iptables -t raw -F || true sudo iptables -t raw -X || true if command -v ipvsadm >/dev/null 2>&1; then sudo ipvsadm --clear || true fi } cleanup_calico_runtime_files() { local path for path in /run/calico /var/run/calico; do if sudo test -e "${path}"; then sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true sudo rmdir "${path}" 2>/dev/null || true fi done } restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then sudo rm -f /etc/resolv.conf sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf fi sudo systemctl restart systemd-resolved 2>/dev/null || true } cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <( for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true done | sort -ru ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true) sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true } sudo kubeadm reset --force || true sudo systemctl stop kubelet 2>/dev/null || true sudo systemctl stop containerd 2>/dev/null || true sudo killall containerd-shim-runc-v2 2>/dev/null || true cleanup_mounts sudo rm -rf \ /etc/kubernetes/ \ /var/lib/etcd/ \ /var/lib/kubelet/ \ /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables cleanup_calico_links restore_node_dns sudo mkdir -p /etc/containerd/certs.d sudo systemctl reset-failed kubelet containerd 2>/dev/null || true sudo systemctl start containerd 2>/dev/null || true EOF then echo "Remote cleanup failed for ${target}; not deleting OpenTofu state." >&2 exit 1 fi done docker buildx rm lab-builder 2>/dev/null || true docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true rm -f "${BUILDX_CONFIG}" || true echo "--> Deleting OpenTofu tracking state files..." rm -rf "${REPO_ROOT}"/bootstrap/cluster/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/cluster/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/cluster/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/platform/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/platform/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/platform/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/apps/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/apps/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/apps/.terraform/ rm -rf "${REPO_ROOT}"/bootstrap/edge/terraform.tfstate* rm -f "${REPO_ROOT}"/bootstrap/edge/.terraform.tfstate.lock.info rm -rf "${REPO_ROOT}"/bootstrap/edge/.terraform/ echo "Destruction complete. Retained data under /var/openebs/local was left intact." } case "${1:-}" in up) up ;; rebuild-cluster) rebuild_cluster ;; apps) apps ;; deploy-gitea) deploy_gitea ;; bootstrap-gitea-repo) bootstrap_gitea_repo ;; backup-gitea) backup_gitea ;; drill-gitea-restore) drill_gitea_restore ;; install-gitea-runner) install_gitea_runner "${2:-}" ;; move-prometheus-stack-workers) move_prometheus_stack_workers ;; openwrt) openwrt ;; nuke) nuke ;; *) echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|move-prometheus-stack-workers|openwrt|nuke}" exit 1 ;; esac