my-homelab-configs/lab.sh

2688 lines
93 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILDX_CONFIG="/tmp/buildx-config.toml"
KUBECONFIG_PATH="${KUBECONFIG_PATH:-${TF_VAR_kubeconfig_path:-/home/jv/.kube/config}}"
trap 'rm -f "${BUILDX_CONFIG}"' EXIT
require_debian_server() {
local command_name="$1"
local os_id=""
if [[ "$(uname -s)" != "Linux" ]]; then
echo "Refusing to run '${command_name}' from this machine. Run it on the Debian homelab server." >&2
exit 1
fi
if [[ -r /etc/os-release ]]; then
os_id="$(awk -F= '$1 == "ID" {gsub(/"/, "", $2); print $2; exit}' /etc/os-release)"
fi
if [[ "${os_id}" != "debian" ]]; then
echo "Refusing to run '${command_name}' on ${os_id:-unknown OS}. Run it on the Debian homelab server." >&2
exit 1
fi
}
tofu_state_has_resource() {
local stack="$1"
local resource_address="$2"
tofu -chdir="${REPO_ROOT}/${stack}" state show "${resource_address}" >/dev/null 2>&1
}
helm_release_secret_exists() {
local namespace="$1"
local release_name="$2"
local secret_name
secret_name="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get secrets \
-l "owner=helm,name=${release_name}" \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
[[ -n "${secret_name}" ]]
}
kubernetes_resource_exists() {
local namespace="$1"
local resource_kind="$2"
local resource_name="$3"
if [[ -n "${namespace}" ]]; then
kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get "${resource_kind}" "${resource_name}" >/dev/null 2>&1
return $?
fi
kubectl --kubeconfig "${KUBECONFIG_PATH}" get "${resource_kind}" "${resource_name}" >/dev/null 2>&1
}
adopt_tofu_helm_release() {
local stack="$1"
local resource_address="$2"
local namespace="$3"
local release_name="$4"
if tofu_state_has_resource "${stack}" "${resource_address}"; then
return 0
fi
if ! helm_release_secret_exists "${namespace}" "${release_name}"; then
return 0
fi
echo "Importing existing Helm release ${namespace}/${release_name} into ${stack} state (${resource_address})..."
tofu -chdir="${REPO_ROOT}/${stack}" import -input=false "${resource_address}" "${namespace}/${release_name}"
}
adopt_tofu_kubernetes_resource() {
local stack="$1"
local resource_address="$2"
local namespace="$3"
local resource_kind="$4"
local resource_name="$5"
local import_id="$6"
if tofu_state_has_resource "${stack}" "${resource_address}"; then
return 0
fi
if ! kubernetes_resource_exists "${namespace}" "${resource_kind}" "${resource_name}"; then
return 0
fi
echo "Importing existing Kubernetes ${resource_kind} ${resource_name} into ${stack} state (${resource_address})..."
tofu -chdir="${REPO_ROOT}/${stack}" import -input=false "${resource_address}" "${import_id}"
}
adopt_platform_existing_resources() {
local stack="bootstrap/platform"
adopt_tofu_helm_release "${stack}" "helm_release.calico_crds" "tigera-operator" "calico-crds"
adopt_tofu_helm_release "${stack}" "helm_release.calico" "tigera-operator" "calico"
adopt_tofu_helm_release "${stack}" "helm_release.openebs" "openebs" "openebs"
adopt_tofu_helm_release "${stack}" "helm_release.argocd" "argocd" "argocd"
adopt_tofu_helm_release "${stack}" "helm_release.kyverno" "kyverno" "kyverno"
adopt_tofu_helm_release "${stack}" "helm_release.kyverno_policies" "kyverno" "kyverno-policies"
adopt_tofu_helm_release "${stack}" "helm_release.loki" "monitoring" "loki"
adopt_tofu_helm_release "${stack}" "helm_release.promtail" "monitoring" "promtail"
adopt_tofu_helm_release "${stack}" "helm_release.prometheus_stack" "monitoring" "prometheus-stack"
adopt_tofu_kubernetes_resource \
"${stack}" \
"kubernetes_storage_class_v1.openebs_hostpath_retain" \
"" \
"storageclass" \
"openebs-hostpath-retain" \
"openebs-hostpath-retain"
adopt_tofu_kubernetes_resource \
"${stack}" \
"kubernetes_namespace_v1.monitoring" \
"" \
"namespace" \
"monitoring" \
"monitoring"
}
run_tofu_stack() {
local stack="$1"
local -a apply_args=(-auto-approve)
if [[ "${stack}" == "bootstrap/cluster" && -n "${LAB_CLUSTER_VAR_FILE:-}" ]]; then
apply_args+=("-var-file=${LAB_CLUSTER_VAR_FILE}")
fi
tofu -chdir="${REPO_ROOT}/${stack}" init
if [[ "${stack}" == "bootstrap/platform" ]]; then
adopt_platform_existing_resources
fi
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
}
truthy() {
case "${1,,}" in
1 | true | yes | on)
return 0
;;
*)
return 1
;;
esac
}
disabled_value() {
case "${1,,}" in
0 | false | no | off | disabled)
return 0
;;
*)
return 1
;;
esac
}
worker_index_is_skipped() {
local index="$1"
local skip_indexes="$2"
local skip_index
skip_indexes="${skip_indexes//,/ }"
for skip_index in ${skip_indexes}; do
[[ -z "${skip_index}" ]] && continue
if ! [[ "${skip_index}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_SKIP_WORKER_INDEXES must contain only comma or space separated positive integers." >&2
exit 1
fi
if ((skip_index == index)); then
return 0
fi
done
return 1
}
ensure_python3() {
if command -v python3 >/dev/null 2>&1; then
return 0
fi
sudo apt-get update
sudo apt-get install -y --no-install-recommends python3
}
detect_route_interface() {
local target="$1"
ip route get "${target}" 2>/dev/null | awk '
{
for (i = 1; i <= NF; i++) {
if ($i == "dev") {
print $(i + 1)
exit
}
}
}
'
}
pimox_ssh() {
local host="$1"
local user="$2"
local key_path="$3"
shift 3
ssh -i "${key_path}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${user}@${host}" "$@"
}
pimox_guest_ipv4() {
local guest_json
local host="$1"
local user="$2"
local key_path="$3"
local vmid="$4"
local ip_prefix="$5"
local qm_bin="${6:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
guest_json="$(pimox_ssh "${host}" "${user}" "${key_path}" "sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" 2>/dev/null || true)"
if [[ -z "${guest_json}" ]]; then
return 1
fi
GUEST_JSON="${guest_json}" python3 - "${ip_prefix}" <<'PY'
import json
import os
import sys
prefix = sys.argv[1]
try:
interfaces = json.loads(os.environ.get("GUEST_JSON", ""))
except Exception:
sys.exit(1)
for iface in interfaces or []:
for address in iface.get("ip-addresses") or []:
if address.get("ip-address-type") != "ipv4":
continue
ip = address.get("ip-address", "")
if not ip or ip.startswith(("127.", "169.254.")):
continue
if prefix and not ip.startswith(prefix):
continue
print(ip)
sys.exit(0)
sys.exit(1)
PY
}
pimox_worker_vm_debug() {
local host="$1"
local user="$2"
local key_path="$3"
local vmid="$4"
local qm_bin="$5"
pimox_ssh "${host}" "${user}" "${key_path}" "set +e
echo 'Pimox VM ${vmid} status:'
sudo '${qm_bin}' status '${vmid}'
echo 'Pimox VM ${vmid} config summary:'
sudo '${qm_bin}' config '${vmid}' | grep -E '^(agent|boot|net0|scsi0|virtio0|sata0|ide0|ide2|efidisk0):' || true
echo 'Pimox VM ${vmid} guest-agent network-get-interfaces:'
sudo '${qm_bin}' guest cmd '${vmid}' network-get-interfaces" >&2 || true
}
wait_for_pimox_guest_ssh() {
local host="$1"
local user="$2"
local key_path="$3"
local vmid="$4"
local guest_user="$5"
local guest_key_path="$6"
local ip_prefix="$7"
local timeout_seconds="$8"
local qm_bin="${9:-${LAB_PIMOX_QM_BIN:-/usr/sbin/qm}}"
local deadline
local elapsed
local guest_ip
local ip_filter_description
local known_hosts_file="${REPO_ROOT}/.lab/pimox-worker-known_hosts"
local last_guest_ip=""
local last_known_hosts_ip=""
local last_ssh_output=""
local next_log
local ssh_deadline=0
local ssh_output
local ssh_timeout_seconds="${LAB_PIMOX_GUEST_SSH_TIMEOUT_SECONDS:-600}"
ip_filter_description="matching prefix ${ip_prefix}"
if [[ -z "${ip_prefix}" ]]; then
ip_filter_description="that is not loopback or link-local"
fi
if ! [[ "${ssh_timeout_seconds}" =~ ^[0-9]+$ ]] || ((ssh_timeout_seconds == 0)); then
echo "LAB_PIMOX_GUEST_SSH_TIMEOUT_SECONDS must be a positive integer." >&2
return 1
fi
mkdir -p "$(dirname "${known_hosts_file}")"
touch "${known_hosts_file}"
chmod 0600 "${known_hosts_file}"
deadline=$((SECONDS + timeout_seconds))
next_log="${SECONDS}"
while ((SECONDS < deadline)); do
guest_ip="$(pimox_guest_ipv4 "${host}" "${user}" "${key_path}" "${vmid}" "${ip_prefix}" "${qm_bin}" || true)"
if [[ -n "${guest_ip}" ]]; then
if ((ssh_deadline == 0)); then
ssh_deadline=$((SECONDS + ssh_timeout_seconds))
elif ((SECONDS >= ssh_deadline)); then
break
fi
last_guest_ip="${guest_ip}"
if [[ "${last_known_hosts_ip}" != "${guest_ip}" ]]; then
ssh-keygen -R "${guest_ip}" -f "${known_hosts_file}" >/dev/null 2>&1 || true
last_known_hosts_ip="${guest_ip}"
fi
if ssh_output="$(ssh -i "${guest_key_path}" -o BatchMode=yes -o ConnectTimeout=8 -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="${known_hosts_file}" "${guest_user}@${guest_ip}" true 2>&1)"; then
printf '%s\n' "${guest_ip}"
return 0
fi
last_ssh_output="${ssh_output}"
fi
if ((SECONDS >= next_log)); then
elapsed=$((timeout_seconds - (deadline - SECONDS)))
if [[ -n "${last_guest_ip}" ]]; then
echo "Waiting for SSH to worker VM ${vmid} at ${last_guest_ip} as ${guest_user} (${elapsed}s elapsed)..." >&2
if [[ -n "${last_ssh_output}" ]]; then
echo "Last SSH failure: ${last_ssh_output}" >&2
fi
else
echo "Waiting for worker VM ${vmid} to report an IPv4 address ${ip_filter_description} through qemu-guest-agent (${elapsed}s elapsed)..." >&2
fi
next_log=$((SECONDS + 60))
fi
sleep 10
done
if [[ -n "${last_guest_ip}" ]]; then
echo "Worker VM ${vmid} reported guest IP ${last_guest_ip}, but SSH as ${guest_user} never became reachable." >&2
if [[ -n "${last_ssh_output}" ]]; then
echo "Last SSH failure: ${last_ssh_output}" >&2
fi
else
echo "Worker VM ${vmid} did not report an IPv4 address ${ip_filter_description} through qemu-guest-agent." >&2
fi
pimox_worker_vm_debug "${host}" "${user}" "${key_path}" "${vmid}" "${qm_bin}"
return 1
}
pimox_generated_mac() {
local vmid="$1"
printf '02:68:10:%02x:%02x:%02x\n' \
$(((vmid >> 16) & 255)) \
$(((vmid >> 8) & 255)) \
$((vmid & 255))
}
cpuset_cpu_count() {
local cpuset="$1"
local count=0
local part
local start
local end
local -a parts
IFS=',' read -r -a parts <<<"${cpuset}"
for part in "${parts[@]}"; do
if [[ "${part}" =~ ^([0-9]+)-([0-9]+)$ ]]; then
start="${BASH_REMATCH[1]}"
end="${BASH_REMATCH[2]}"
if ((end < start)); then
return 1
fi
count=$((count + end - start + 1))
elif [[ "${part}" =~ ^[0-9]+$ ]]; then
count=$((count + 1))
else
return 1
fi
done
printf '%s\n' "${count}"
}
pimox_worker_cpu_affinity() {
local index="$1"
local affinities="$2"
local worker_cores="$3"
local affinity
local affinity_index=1
local cpu_count
for affinity in ${affinities}; do
if ((affinity_index == index)); then
if ! cpu_count="$(cpuset_cpu_count "${affinity}")"; then
echo "Invalid Pimox worker CPU affinity '${affinity}'. Use CPU IDs or ranges, such as 4-5." >&2
exit 1
fi
if ((cpu_count != worker_cores)); then
echo "Pimox worker index ${index} uses ${worker_cores} cores but affinity '${affinity}' contains ${cpu_count} CPUs." >&2
exit 1
fi
printf '%s\n' "${affinity}"
return 0
fi
affinity_index=$((affinity_index + 1))
done
echo "No LAB_PIMOX_WORKER_CPU_AFFINITIES entry exists for Pimox worker index ${index}." >&2
exit 1
}
ensure_pimox_worker_node() {
local index="$1"
local spec_file="$2"
local pimox_host="$3"
local pimox_user="$4"
local pimox_key="$5"
local template_vmid="$6"
local bridge="$7"
local worker_base_vmid="$8"
local worker_name_prefix="$9"
local worker_node_prefix="${10}"
local worker_key_prefix="${11}"
local worker_cores="${12}"
local worker_memory="${13}"
local worker_user="${14}"
local worker_key_path="${15}"
local ip_prefix="${16}"
local timeout_seconds="${17}"
local qm_bin="${18}"
local worker_storage="${19}"
local worker_replace_existing="${20}"
local worker_cpu_affinity="${21}"
local padded
local vmid
local worker_key
local worker_name
local node_name
local mac
local guest_ip
printf -v padded '%02d' "${index}"
vmid=$((worker_base_vmid + index - 1))
worker_key="${worker_key_prefix}${padded}"
worker_name="${worker_name_prefix}-${padded}"
node_name="${worker_node_prefix}-${padded}"
mac="$(pimox_generated_mac "${vmid}")"
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then
if pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${vmid}' | grep -q '^template: 1$'"; then
echo "VM ${vmid} exists as a template; refusing to reuse it as worker ${worker_name}." >&2
exit 1
fi
if truthy "${worker_replace_existing}"; then
echo "Replacing existing Pimox worker VM ${vmid} (${worker_name}) before cloning from template ${template_vmid}..."
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
sudo '${qm_bin}' stop '${vmid}' >/dev/null 2>&1 || true
elapsed=0
while [ \"\$elapsed\" -lt 300 ]; do
if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then
break
fi
sleep 5
elapsed=\$((elapsed + 5))
done
sudo '${qm_bin}' destroy '${vmid}' --purge 1 >/dev/null 2>&1 || sudo '${qm_bin}' destroy '${vmid}'"
else
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
sudo '${qm_bin}' set '${vmid}' --agent enabled=1 --sockets 1 --cores '${worker_cores}' --memory '${worker_memory}'
if [ -n '${worker_cpu_affinity}' ]; then
affinity_output=\"\$(sudo '${qm_bin}' set '${vmid}' --affinity '${worker_cpu_affinity}' 2>&1)\" || {
case \"\$affinity_output\" in
*'Unknown option: affinity'*)
echo 'Pimox qm does not support --affinity; skipping CPU affinity ${worker_cpu_affinity} for VM ${vmid}.'
;;
*)
printf '%s\n' \"\$affinity_output\" >&2
exit 1
;;
esac
}
fi
if sudo '${qm_bin}' status '${vmid}' | grep -q 'status: stopped'; then sudo '${qm_bin}' start '${vmid}'; fi"
fi
fi
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' status '${vmid}' >/dev/null 2>&1"; then
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'Pimox bridge ${bridge} does not exist. Refusing to change Orange Pi networking.' >&2
exit 1
fi
pvesm_cmd=\"\$(command -v pvesm 2>/dev/null || true)\"
if [ -z \"\$pvesm_cmd\" ] && [ -x /usr/sbin/pvesm ]; then
pvesm_cmd=/usr/sbin/pvesm
fi
if [ -z \"\$pvesm_cmd\" ]; then
echo 'pvesm was not found; cannot validate Pimox worker storage ${worker_storage}' >&2
exit 1
fi
if ! sudo \"\$pvesm_cmd\" status | awk -v storage='${worker_storage}' 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then
echo 'Pimox worker storage ${worker_storage} was not found. Refusing to create worker ${worker_name}.' >&2
exit 1
fi
sudo '${qm_bin}' clone '${template_vmid}' '${vmid}' --name '${worker_name}' --full 1 --storage '${worker_storage}'
sudo '${qm_bin}' set '${vmid}' --agent enabled=1
sudo '${qm_bin}' set '${vmid}' --sockets 1 --cores '${worker_cores}' --memory '${worker_memory}'
if [ -n '${worker_cpu_affinity}' ]; then
affinity_output=\"\$(sudo '${qm_bin}' set '${vmid}' --affinity '${worker_cpu_affinity}' 2>&1)\" || {
case \"\$affinity_output\" in
*'Unknown option: affinity'*)
echo 'Pimox qm does not support --affinity; skipping CPU affinity ${worker_cpu_affinity} for VM ${vmid}.'
;;
*)
printf '%s\n' \"\$affinity_output\" >&2
exit 1
;;
esac
}
fi
sudo '${qm_bin}' set '${vmid}' --net0 'virtio=${mac},bridge=${bridge}'
sudo '${qm_bin}' set '${vmid}' --boot 'order=scsi0;net0'
sudo '${qm_bin}' set '${vmid}' --onboot 1
sudo '${qm_bin}' start '${vmid}'"
fi
if ! guest_ip="$(wait_for_pimox_guest_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "${vmid}" "${worker_user}" "${worker_key_path}" "${ip_prefix}" "${timeout_seconds}" "${qm_bin}")"; then
echo "Timed out waiting for worker VM ${vmid} (${worker_name}) to report a reachable guest IP." >&2
exit 1
fi
printf '%s\t%s\t%s\t%s\t%s\n' "${worker_key}" "${guest_ip}" "${worker_user}" "${node_name}" "${worker_key_path}" >>"${spec_file}"
}
write_cluster_worker_var_file() {
local spec_file="$1"
local var_file="$2"
LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-false}" \
LAB_RASPBERRY_HOST="${LAB_RASPBERRY_HOST:-192.168.100.89}" \
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
python3 - "${spec_file}" "${var_file}" <<'PY'
import json
import os
import sys
spec_file, var_file = sys.argv[1:3]
nodes = {}
node_labels = {}
try:
raspberry_labels = json.loads(os.environ["LAB_RASPBERRY_NODE_LABELS_JSON"])
pimox_labels = json.loads(os.environ["LAB_PIMOX_WORKER_NODE_LABELS_JSON"])
except json.JSONDecodeError as exc:
raise SystemExit(f"Invalid node label JSON: {exc}") from exc
if os.environ["LAB_INCLUDE_RASPBERRY_WORKER"].lower() not in {"0", "false", "no", "off", "disabled"}:
nodes["raspberrypi"] = {
"host": os.environ["LAB_RASPBERRY_HOST"],
"user": os.environ["LAB_RASPBERRY_USER"],
"node_name": os.environ["LAB_RASPBERRY_NODE_NAME"],
"ssh_key_path": os.environ["LAB_RASPBERRY_SSH_KEY_PATH"],
}
node_labels["raspberrypi"] = raspberry_labels
with open(spec_file, encoding="utf-8") as handle:
for line in handle:
line = line.rstrip("\n")
if not line:
continue
key, host, user, node_name, ssh_key_path = line.split("\t")
nodes[key] = {
"host": host,
"user": user,
"node_name": node_name,
"ssh_key_path": ssh_key_path,
}
node_labels[key] = pimox_labels
with open(var_file, "w", encoding="utf-8") as handle:
json.dump({"worker_nodes": nodes, "worker_node_labels": node_labels}, handle, indent=2)
handle.write("\n")
PY
}
run_pimox_pipeline() {
local mode="${LAB_PIMOX_PIPELINE:-false}"
local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}"
local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}"
local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}"
local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}"
local bridge="${LAB_PIMOX_BRIDGE:-${TF_VAR_pimox_template_bridge:-vmbr0}}"
local template_vmid="${LAB_PIMOX_TEMPLATE_VMID:-${TF_VAR_pimox_template_vmid:-9000}}"
local template_name="${LAB_PIMOX_TEMPLATE_NAME:-${TF_VAR_pimox_template_name:-debian13-arm64-k8s-template}}"
local template_cores="${LAB_PIMOX_TEMPLATE_CORES:-${TF_VAR_pimox_template_cores:-2}}"
local template_memory="${LAB_PIMOX_TEMPLATE_MEMORY:-${TF_VAR_pimox_template_memory:-4096}}"
local template_cpu_affinity="${LAB_PIMOX_TEMPLATE_CPU_AFFINITY:-${TF_VAR_pimox_template_cpu_affinity:-}}"
local template_replace_existing="${LAB_PIMOX_TEMPLATE_REPLACE_EXISTING:-${TF_VAR_pimox_template_replace_existing:-false}}"
local provisioning_interface
local worker_count="${LAB_PIMOX_WORKER_COUNT:-2}"
local worker_base_vmid="${LAB_PIMOX_WORKER_BASE_VMID:-9010}"
local worker_name_prefix="${LAB_PIMOX_WORKER_NAME_PREFIX:-pimox-worker}"
local worker_node_prefix="${LAB_PIMOX_WORKER_NODE_PREFIX:-pimox-worker}"
local worker_key_prefix="${LAB_PIMOX_WORKER_KEY_PREFIX:-pimox}"
local worker_skip_indexes="${LAB_PIMOX_SKIP_WORKER_INDEXES:-}"
local worker_cores="${LAB_PIMOX_WORKER_CORES:-2}"
local worker_memory="${LAB_PIMOX_WORKER_MEMORY:-4096}"
local worker_cpu_affinities="${LAB_PIMOX_WORKER_CPU_AFFINITIES:-}"
local worker_replace_existing="${LAB_PIMOX_WORKER_REPLACE_EXISTING:-false}"
local worker_storage="${LAB_PIMOX_WORKER_STORAGE:-${TF_VAR_pimox_worker_storage:-nvme_thin_pool}}"
local worker_user="${LAB_PIMOX_WORKER_USER:-jv}"
local worker_key_path="${LAB_PIMOX_WORKER_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}"
local ip_prefix="${LAB_PIMOX_GUEST_IP_PREFIX:-192.168.100.}"
local timeout_seconds="${LAB_PIMOX_GUEST_TIMEOUT_SECONDS:-3600}"
local spec_file="${REPO_ROOT}/.lab/pimox-workers.tsv"
local var_file="${REPO_ROOT}/.lab/cluster-workers.auto.tfvars.json"
local index
local readiness_output
local readiness_status
local template_cpu_count
local worker_cpu_affinity
if disabled_value "${mode}"; then
return 0
fi
if [[ "${mode}" == "auto" && -n "${LAB_PIMOX_WORKER_COUNT+x}" ]]; then
mode="true"
fi
if ! [[ "${worker_count}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_WORKER_COUNT must be a non-negative integer." >&2
exit 1
fi
if ! [[ "${template_cores}" =~ ^[0-9]+$ && "${worker_cores}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_TEMPLATE_CORES and LAB_PIMOX_WORKER_CORES must be positive integers." >&2
exit 1
fi
if ! [[ "${template_memory}" =~ ^[0-9]+$ && "${worker_memory}" =~ ^[0-9]+$ ]]; then
echo "LAB_PIMOX_TEMPLATE_MEMORY and LAB_PIMOX_WORKER_MEMORY must be positive integer MiB values." >&2
exit 1
fi
if ((template_cores == 0 || worker_cores == 0 || template_memory == 0 || worker_memory == 0)); then
echo "Pimox template and worker CPU and memory values must be greater than zero." >&2
exit 1
fi
if [[ -n "${template_cpu_affinity}" ]]; then
if ! template_cpu_count="$(cpuset_cpu_count "${template_cpu_affinity}")"; then
echo "Invalid Pimox template CPU affinity '${template_cpu_affinity}'. Use CPU IDs or ranges, such as 4-5." >&2
exit 1
fi
if ((template_cpu_count != template_cores)); then
echo "Pimox template uses ${template_cores} cores but affinity '${template_cpu_affinity}' contains ${template_cpu_count} CPUs." >&2
exit 1
fi
fi
if ! truthy "${worker_replace_existing}" && ! disabled_value "${worker_replace_existing}"; then
echo "LAB_PIMOX_WORKER_REPLACE_EXISTING must be true or false." >&2
exit 1
fi
if ! [[ "${worker_storage}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then
echo "LAB_PIMOX_WORKER_STORAGE must be a valid Pimox storage identifier." >&2
exit 1
fi
if [[ "${worker_storage}" == "local" ]]; then
echo "LAB_PIMOX_WORKER_STORAGE cannot be local; only the Pimox template VM should live on local storage." >&2
exit 1
fi
set +e
readiness_output="$(pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "set -eu
if ! { command -v qm >/dev/null 2>&1 || [ -x '${qm_bin}' ]; }; then
echo 'qm was not found in PATH and ${qm_bin} is not executable'
exit 1
fi
if ! ip link show '${bridge}' >/dev/null 2>&1; then
echo 'bridge ${bridge} was not found'
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo 'passwordless sudo is not available for ${pimox_user}'
exit 1
fi" 2>&1)"
readiness_status=$?
set -e
if ((readiness_status != 0)); then
if [[ "${mode}" == "auto" ]]; then
echo "Skipping Pimox automation because ${pimox_user}@${pimox_host} with bridge ${bridge} is not ready."
return 0
fi
echo "Pimox automation requested, but ${pimox_user}@${pimox_host} is not ready: ${readiness_output}" >&2
exit 1
fi
ensure_python3
provisioning_interface="${TF_VAR_provisioning_interface:-${LAB_PROVISIONING_INTERFACE:-$(detect_route_interface "${pimox_host}")}}"
if [[ -z "${provisioning_interface}" ]]; then
echo "Could not detect the Debian interface used to reach ${pimox_host}; set LAB_PROVISIONING_INTERFACE." >&2
exit 1
fi
export TF_VAR_provisioning_interface="${provisioning_interface}"
export TF_VAR_pimox_host="${pimox_host}"
export TF_VAR_pimox_user="${pimox_user}"
export TF_VAR_pimox_ssh_key_path="${pimox_key}"
export TF_VAR_pimox_qm_bin="${qm_bin}"
export TF_VAR_pimox_template_bridge="${bridge}"
export TF_VAR_pimox_template_vmid="${template_vmid}"
export TF_VAR_pimox_template_name="${template_name}"
export TF_VAR_pimox_template_cores="${template_cores}"
export TF_VAR_pimox_template_memory="${template_memory}"
export TF_VAR_pimox_template_cpu_affinity="${template_cpu_affinity}"
export TF_VAR_pimox_template_replace_existing="${template_replace_existing}"
export TF_VAR_pimox_template_builder_enabled="${TF_VAR_pimox_template_builder_enabled:-true}"
export TF_VAR_pimox_template_build_ssh_key_path="${TF_VAR_pimox_template_build_ssh_key_path:-${worker_key_path}}"
export TF_VAR_pimox_template_build_user="${TF_VAR_pimox_template_build_user:-${worker_user}}"
export TF_VAR_pimox_template_guest_ip_prefix="${TF_VAR_pimox_template_guest_ip_prefix:-${ip_prefix}}"
export TF_VAR_pimox_template_build_timeout_seconds="${TF_VAR_pimox_template_build_timeout_seconds:-${timeout_seconds}}"
echo "Preparing Pimox provisioning and Debian worker template on ${pimox_host} without changing Orange Pi host networking..."
run_tofu_stack "bootstrap/provisioning"
if ((worker_count == 0)); then
return 0
fi
if ! pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' config '${template_vmid}' | grep -q '^template: 1$'"; then
echo "Template VM ${template_vmid} is not available as a Pimox template after provisioning." >&2
exit 1
fi
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "sudo '${qm_bin}' set '${template_vmid}' --agent enabled=1"
echo "Worker VM clones will be created on Pimox storage ${worker_storage}; template VM ${template_vmid} stays on its configured template storage."
mkdir -p "${REPO_ROOT}/.lab"
: >"${spec_file}"
for ((index = 1; index <= worker_count; index++)); do
if worker_index_is_skipped "${index}" "${worker_skip_indexes}"; then
echo "Skipping Pimox worker index ${index} because LAB_PIMOX_SKIP_WORKER_INDEXES=${worker_skip_indexes}."
continue
fi
worker_cpu_affinity=""
if [[ -n "${worker_cpu_affinities}" ]]; then
worker_cpu_affinity="$(pimox_worker_cpu_affinity "${index}" "${worker_cpu_affinities}" "${worker_cores}")"
fi
ensure_pimox_worker_node \
"${index}" \
"${spec_file}" \
"${pimox_host}" \
"${pimox_user}" \
"${pimox_key}" \
"${template_vmid}" \
"${bridge}" \
"${worker_base_vmid}" \
"${worker_name_prefix}" \
"${worker_node_prefix}" \
"${worker_key_prefix}" \
"${worker_cores}" \
"${worker_memory}" \
"${worker_user}" \
"${worker_key_path}" \
"${ip_prefix}" \
"${timeout_seconds}" \
"${qm_bin}" \
"${worker_storage}" \
"${worker_replace_existing}" \
"${worker_cpu_affinity}"
done
write_cluster_worker_var_file "${spec_file}" "${var_file}"
export LAB_CLUSTER_VAR_FILE="${var_file}"
}
run_openwrt_pipeline() {
local mode="${LAB_OPENWRT_VM:-${LAB_OPENWRT_PIPELINE:-false}}"
local pimox_host="${LAB_PIMOX_HOST:-${TF_VAR_pimox_host:-192.168.100.80}}"
local pimox_user="${LAB_PIMOX_USER:-${TF_VAR_pimox_user:-jv}}"
local pimox_key="${LAB_PIMOX_SSH_KEY_PATH:-${TF_VAR_pimox_ssh_key_path:-/home/jv/.ssh/id_ed25519}}"
local qm_bin="${LAB_PIMOX_QM_BIN:-${TF_VAR_pimox_qm_bin:-/usr/sbin/qm}}"
local vmid="${LAB_OPENWRT_VMID:-9050}"
local vm_name="${LAB_OPENWRT_NAME:-openwrt-firewall}"
local storage="${LAB_OPENWRT_STORAGE:-nvme_thin_pool}"
local wan_bridge="${LAB_OPENWRT_WAN_BRIDGE:-vmbr0}"
local lan_bridge="${LAB_OPENWRT_LAN_BRIDGE:-vmbr1}"
local cores="${LAB_OPENWRT_CORES:-2}"
local memory="${LAB_OPENWRT_MEMORY:-512}"
local version="${LAB_OPENWRT_VERSION:-24.10.6}"
local image_url="${LAB_OPENWRT_IMAGE_URL:-}"
local lan_ip="${LAB_OPENWRT_LAN_IP:-192.168.50.1}"
local lan_netmask="${LAB_OPENWRT_LAN_NETMASK:-255.255.255.0}"
local lan_dhcp_enabled="${LAB_OPENWRT_LAN_DHCP_ENABLED:-false}"
local start_vm="${LAB_OPENWRT_START:-true}"
local root_key_path="${LAB_OPENWRT_ROOT_SSH_PUBLIC_KEY_PATH:-${pimox_key}.pub}"
local root_key_b64=""
local lan_dhcp_ignore="1"
local start_vm_flag="false"
if disabled_value "${mode}"; then
return 0
fi
if ! truthy "${mode}"; then
echo "LAB_OPENWRT_VM must be true or false." >&2
exit 1
fi
if [[ -z "${image_url}" ]]; then
image_url="https://downloads.openwrt.org/releases/${version}/targets/armsr/armv8/openwrt-${version}-armsr-armv8-generic-ext4-combined-efi.img.gz"
fi
if ! [[ "${vmid}" =~ ^[0-9]+$ ]]; then
echo "LAB_OPENWRT_VMID must be a numeric Pimox VMID." >&2
exit 1
fi
for value_name in storage wan_bridge lan_bridge vm_name; do
local value="${!value_name}"
if ! [[ "${value}" =~ ^[A-Za-z0-9_.:-]+$ ]]; then
echo "LAB_OPENWRT_${value_name^^} contains unsupported characters." >&2
exit 1
fi
done
if [[ "${storage}" == "local" ]]; then
echo "LAB_OPENWRT_STORAGE cannot be local; reserve local storage for the Pimox Debian template." >&2
exit 1
fi
if ! [[ "${lan_ip}" =~ ^[0-9.]+$ && "${lan_netmask}" =~ ^[0-9.]+$ ]]; then
echo "LAB_OPENWRT_LAN_IP and LAB_OPENWRT_LAN_NETMASK must be IPv4-style values." >&2
exit 1
fi
if truthy "${lan_dhcp_enabled}"; then
lan_dhcp_ignore="0"
fi
if truthy "${start_vm}"; then
start_vm_flag="true"
fi
if [[ -r "${root_key_path}" ]]; then
root_key_b64="$(base64 <"${root_key_path}" | tr -d '\n')"
fi
echo "Preparing OpenWrt firewall VM ${vmid} on ${pimox_host}; validating ${wan_bridge}, ${lan_bridge}, and ${storage} without changing Orange Pi networking..."
pimox_ssh "${pimox_host}" "${pimox_user}" "${pimox_key}" "bash -s" <<EOF
set -euo pipefail
vmid="${vmid}"
vm_name="${vm_name}"
storage="${storage}"
wan_bridge="${wan_bridge}"
lan_bridge="${lan_bridge}"
cores="${cores}"
memory="${memory}"
image_url="${image_url}"
lan_ip="${lan_ip}"
lan_netmask="${lan_netmask}"
lan_dhcp_ignore="${lan_dhcp_ignore}"
start_vm="${start_vm_flag}"
root_key_b64="${root_key_b64}"
qm_cmd="${qm_bin}"
if [ ! -x "\$qm_cmd" ]; then
qm_cmd="\$(command -v qm 2>/dev/null || true)"
fi
if [ -z "\$qm_cmd" ]; then
echo "qm is not installed on this Pimox host" >&2
exit 1
fi
pvesm_cmd="\$(command -v pvesm 2>/dev/null || true)"
if [ -z "\$pvesm_cmd" ] && [ -x /usr/sbin/pvesm ]; then
pvesm_cmd=/usr/sbin/pvesm
fi
if [ -z "\$pvesm_cmd" ]; then
echo "pvesm was not found; cannot validate Pimox storage \$storage" >&2
exit 1
fi
if ! sudo -n true >/dev/null 2>&1; then
echo "passwordless sudo is required for OpenWrt VM automation" >&2
exit 1
fi
if ! ip link show "\$wan_bridge" >/dev/null 2>&1; then
echo "WAN bridge \$wan_bridge does not exist. Refusing to change Orange Pi networking." >&2
exit 1
fi
if ! ip link show "\$lan_bridge" >/dev/null 2>&1; then
echo "LAN bridge \$lan_bridge does not exist. Create it manually before enabling OpenWrt automation." >&2
exit 1
fi
if ! sudo "\$pvesm_cmd" status | awk -v storage="\$storage" 'NR > 1 && \$1 == storage { found = 1 } END { exit found ? 0 : 1 }'; then
echo "Pimox storage \$storage was not found." >&2
exit 1
fi
if sudo "\$qm_cmd" status "\$vmid" >/dev/null 2>&1; then
if sudo "\$qm_cmd" config "\$vmid" | grep -q '^template: 1$'; then
echo "VM \$vmid exists as a template; refusing to reuse it for OpenWrt." >&2
exit 1
fi
sudo "\$qm_cmd" set "\$vmid" \\
--net0 "virtio,bridge=\$wan_bridge" \\
--net1 "virtio,bridge=\$lan_bridge" \\
--cores "\$cores" \\
--memory "\$memory" \\
--onboot 1
if [ "\$start_vm" = "true" ] && sudo "\$qm_cmd" status "\$vmid" | grep -q 'status: stopped'; then
sudo "\$qm_cmd" start "\$vmid"
fi
exit 0
fi
for required_cmd in curl gzip losetup mount umount awk sed; do
if ! command -v "\$required_cmd" >/dev/null 2>&1; then
echo "\$required_cmd is required on the Pimox host for OpenWrt image preparation" >&2
exit 1
fi
done
tmp_dir="\$(mktemp -d /tmp/homelab-openwrt.XXXXXX)"
mnt_dir="\$tmp_dir/root"
loopdev=""
cleanup() {
if mountpoint -q "\$mnt_dir" 2>/dev/null; then
sudo umount "\$mnt_dir" || sudo umount -l "\$mnt_dir" || true
fi
if [ -n "\$loopdev" ]; then
sudo losetup -d "\$loopdev" >/dev/null 2>&1 || true
fi
rm -rf "\$tmp_dir"
}
trap cleanup EXIT
mkdir -p "\$mnt_dir"
curl -fsSL "\$image_url" -o "\$tmp_dir/openwrt.img.gz"
gzip -dc "\$tmp_dir/openwrt.img.gz" >"\$tmp_dir/openwrt.img"
loopdev="\$(sudo losetup --find --partscan --show "\$tmp_dir/openwrt.img")"
root_part="\${loopdev}p2"
if [ ! -b "\$root_part" ] && echo "\$loopdev" | grep -q 'loop[0-9]\$'; then
root_part="\${loopdev}p2"
fi
if [ ! -b "\$root_part" ]; then
echo "Could not find OpenWrt root partition \$root_part after attaching image." >&2
exit 1
fi
sudo mount "\$root_part" "\$mnt_dir"
sudo mkdir -p "\$mnt_dir/etc/config" "\$mnt_dir/etc/dropbear" "\$mnt_dir/root/.ssh"
cat >"\$tmp_dir/network" <<NETWORK
config interface 'loopback'
option device 'lo'
option proto 'static'
option ipaddr '127.0.0.1'
option netmask '255.0.0.0'
config globals 'globals'
option ula_prefix 'fd00:68:50::/48'
config interface 'wan'
option device 'eth0'
option proto 'dhcp'
config interface 'lan'
option device 'eth1'
option proto 'static'
option ipaddr '\$lan_ip'
option netmask '\$lan_netmask'
option ip6assign '60'
NETWORK
cat >"\$tmp_dir/dhcp" <<DHCP
config dnsmasq
option domainneeded '1'
option boguspriv '1'
option filterwin2k '0'
option localise_queries '1'
option rebind_protection '1'
option rebind_localhost '1'
option local '/lan/'
option domain 'lan'
option expandhosts '1'
option cachesize '1000'
option authoritative '1'
option readethers '1'
option leasefile '/tmp/dhcp.leases'
option resolvfile '/tmp/resolv.conf.d/resolv.conf.auto'
config dhcp 'lan'
option interface 'lan'
option start '100'
option limit '150'
option leasetime '12h'
option ignore '\$lan_dhcp_ignore'
config dhcp 'wan'
option interface 'wan'
option ignore '1'
DHCP
cat >"\$tmp_dir/firewall" <<'FIREWALL'
config defaults
option input 'REJECT'
option output 'ACCEPT'
option forward 'REJECT'
option synflood_protect '1'
config zone
option name 'lan'
list network 'lan'
option input 'ACCEPT'
option output 'ACCEPT'
option forward 'ACCEPT'
config zone
option name 'wan'
list network 'wan'
option input 'REJECT'
option output 'ACCEPT'
option forward 'REJECT'
option masq '1'
option mtu_fix '1'
config forwarding
option src 'lan'
option dest 'wan'
config rule
option name 'Allow-DHCP-Renew'
option src 'wan'
option proto 'udp'
option dest_port '68'
option target 'ACCEPT'
option family 'ipv4'
config rule
option name 'Allow-Ping'
option src 'wan'
option proto 'icmp'
option icmp_type 'echo-request'
option family 'ipv4'
option target 'ACCEPT'
FIREWALL
cat >"\$tmp_dir/system" <<SYSTEM
config system
option hostname '\$vm_name'
option timezone 'UTC'
option ttylogin '0'
option log_size '64'
option urandom_seed '0'
SYSTEM
sudo cp "\$tmp_dir/network" "\$mnt_dir/etc/config/network"
sudo cp "\$tmp_dir/dhcp" "\$mnt_dir/etc/config/dhcp"
sudo cp "\$tmp_dir/firewall" "\$mnt_dir/etc/config/firewall"
sudo cp "\$tmp_dir/system" "\$mnt_dir/etc/config/system"
if [ -n "\$root_key_b64" ]; then
printf '%s' "\$root_key_b64" | base64 -d >"\$tmp_dir/authorized_keys"
sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/etc/dropbear/authorized_keys"
sudo cp "\$tmp_dir/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys"
sudo chmod 0600 "\$mnt_dir/etc/dropbear/authorized_keys" "\$mnt_dir/root/.ssh/authorized_keys"
fi
sync
sudo umount "\$mnt_dir"
sudo losetup -d "\$loopdev"
loopdev=""
sudo "\$qm_cmd" create "\$vmid" \\
--name "\$vm_name" \\
--bios ovmf \\
--cores "\$cores" \\
--memory "\$memory" \\
--net0 "virtio,bridge=\$wan_bridge" \\
--net1 "virtio,bridge=\$lan_bridge" \\
--numa 0 \\
--ostype l26 \\
--scsihw virtio-scsi-pci \\
--sockets 1 \\
--vga virtio \\
--onboot 1
sudo "\$qm_cmd" set "\$vmid" --efidisk0 "\$storage:1,efitype=4m,pre-enrolled-keys=0"
sudo "\$qm_cmd" importdisk "\$vmid" "\$tmp_dir/openwrt.img" "\$storage" --format raw >/dev/null
disk_volume="\$(sudo "\$qm_cmd" config "\$vmid" | awk -F': ' '/^unused[0-9]+:/ { print \$2; exit }')"
if [ -z "\$disk_volume" ]; then
echo "Could not find imported OpenWrt disk volume for VM \$vmid" >&2
exit 1
fi
sudo "\$qm_cmd" set "\$vmid" --scsi0 "\$disk_volume"
sudo "\$qm_cmd" set "\$vmid" --boot "order=scsi0"
if [ "\$start_vm" = "true" ]; then
sudo "\$qm_cmd" start "\$vmid"
fi
EOF
}
cleanup_calico_links() {
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
sudo ip link delete vxlan.calico 2>/dev/null || true
sudo ip link delete tunl0 2>/dev/null || true
sudo ip link delete cni0 2>/dev/null || true
sudo ip link delete kube-ipvs0 2>/dev/null || true
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
}
cleanup_iptables() {
sudo iptables -F || true
sudo iptables -X || true
sudo iptables -t nat -F || true
sudo iptables -t nat -X || true
sudo iptables -t mangle -F || true
sudo iptables -t mangle -X || true
sudo iptables -t raw -F || true
sudo iptables -t raw -X || true
if command -v ipvsadm >/dev/null 2>&1; then
sudo ipvsadm --clear || true
fi
}
cleanup_calico_runtime_files() {
local path
for path in /run/calico /var/run/calico; do
if sudo test -e "${path}"; then
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
sudo rmdir "${path}" 2>/dev/null || true
fi
done
}
restore_node_dns() {
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
sudo rm -f /etc/resolv.conf
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
fi
sudo systemctl restart systemd-resolved 2>/dev/null || true
}
cleanup_mounts() {
if command -v findmnt >/dev/null 2>&1; then
local mount_root
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
done | sort -ru
)
fi
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
}
cleanup_node() {
sudo kubeadm reset --force || true
sudo systemctl stop kubelet 2>/dev/null || true
sudo systemctl stop containerd 2>/dev/null || true
sudo killall containerd-shim-runc-v2 2>/dev/null || true
cleanup_mounts
sudo rm -rf \
/etc/kubernetes/ \
/var/lib/etcd/ \
/var/lib/kubelet/ \
/var/lib/cni/ \
/etc/cni/net.d \
/run/flannel \
/var/lib/calico \
/var/log/calico \
/var/lib/containerd/* \
/run/containerd/* \
/etc/containerd/certs.d \
/etc/containerd/config.toml
cleanup_calico_runtime_files
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
cleanup_iptables
cleanup_calico_links
restore_node_dns
sudo mkdir -p /etc/containerd/certs.d
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
sudo systemctl start containerd 2>/dev/null || true
}
website_registry_endpoint() {
local image
image="$(awk '$1 == "image:" && $2 ~ /php-website/ {print $2; exit}' "${REPO_ROOT}/apps/website/web-app.yaml")"
if [[ -z "${image}" || "${image}" != */* ]]; then
echo "Could not determine website registry endpoint from apps/website/web-app.yaml" >&2
exit 1
fi
printf '%s\n' "${image%%/*}"
}
demos_registry_endpoint() {
local image
image="$(awk '$1 == "image:" && $2 ~ /demos-static/ {print $2; exit}' "${REPO_ROOT}/apps/demos-static/web-app.yaml")"
if [[ -z "${image}" || "${image}" != */* ]]; then
echo "Could not determine demos registry endpoint from apps/demos-static/web-app.yaml" >&2
exit 1
fi
printf '%s\n' "${image%%/*}"
}
website_source_hash() {
(
cd "${REPO_ROOT}"
find apps/website -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
)
}
demos_source_hash() {
(
cd "${REPO_ROOT}"
find apps/demos-static -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
)
}
registry_image_exists() {
local registry_endpoint="$1"
local repository="$2"
local tag="$3"
local accept_header
if ! command -v curl >/dev/null 2>&1; then
return 1
fi
accept_header="application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json"
curl -fsS \
-H "Accept: ${accept_header}" \
"http://${registry_endpoint}/v2/${repository}/manifests/${tag}" >/dev/null
}
image_state_value() {
local state_file="$1"
local key="$2"
awk -F= -v key="${key}" '$1 == key {print substr($0, index($0, "=") + 1); exit}' "${state_file}" 2>/dev/null || true
}
website_image_is_current() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
local registry_endpoint="$5"
local saved_hash
local saved_platforms
local saved_image
[[ -f "${state_file}" ]] || return 1
saved_hash="$(image_state_value "${state_file}" source_hash)"
saved_platforms="$(image_state_value "${state_file}" platforms)"
saved_image="$(image_state_value "${state_file}" image)"
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
[[ "${saved_image}" == "${image_ref}" ]] || return 1
registry_image_exists "${registry_endpoint}" php-website latest
}
demos_image_is_current() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
local registry_endpoint="$5"
local saved_hash
local saved_platforms
local saved_image
[[ -f "${state_file}" ]] || return 1
saved_hash="$(image_state_value "${state_file}" source_hash)"
saved_platforms="$(image_state_value "${state_file}" platforms)"
saved_image="$(image_state_value "${state_file}" image)"
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
[[ "${saved_image}" == "${image_ref}" ]] || return 1
registry_image_exists "${registry_endpoint}" demos-static latest
}
write_website_image_state() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
mkdir -p "$(dirname "${state_file}")"
{
printf 'source_hash=%s\n' "${source_hash}"
printf 'platforms=%s\n' "${platforms}"
printf 'image=%s\n' "${image_ref}"
} > "${state_file}"
}
write_demos_image_state() {
local state_file="$1"
local source_hash="$2"
local platforms="$3"
local image_ref="$4"
mkdir -p "$(dirname "${state_file}")"
{
printf 'source_hash=%s\n' "${source_hash}"
printf 'platforms=%s\n' "${platforms}"
printf 'image=%s\n' "${image_ref}"
} > "${state_file}"
}
path_available_mb() {
local path="$1"
while [[ ! -e "${path}" && "${path}" != "/" ]]; do
path="$(dirname "${path}")"
done
df -Pm "${path}" | awk 'NR == 2 {print $4}'
}
docker_root_dir() {
docker info --format '{{.DockerRootDir}}' 2>/dev/null || printf '/var/lib/docker\n'
}
prune_unused_docker_build_data() {
docker buildx rm lab-builder 2>/dev/null || true
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
docker builder prune -af 2>/dev/null || true
docker system prune -af 2>/dev/null || true
}
ensure_docker_build_space() {
local docker_root
local free_mb
local min_free_mb
min_free_mb="${DOCKER_BUILD_MIN_FREE_MB:-4096}"
docker_root="$(docker_root_dir)"
free_mb="$(path_available_mb "${docker_root}")"
if (( free_mb >= min_free_mb )); then
return 0
fi
echo "Docker data root ${docker_root} has ${free_mb}MiB free; pruning unused Docker build data..."
prune_unused_docker_build_data
free_mb="$(path_available_mb "${docker_root}")"
if (( free_mb < min_free_mb )); then
echo "Docker data root ${docker_root} still has only ${free_mb}MiB free after cleanup." >&2
echo "Free space there or move Docker's data-root to a larger filesystem such as /home before building." >&2
echo "Override the threshold with DOCKER_BUILD_MIN_FREE_MB if this host can build with less space." >&2
exit 1
fi
}
prepare_buildx_builder() {
local registry_endpoint="$1"
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
cat <<EOF > "${BUILDX_CONFIG}"
[registry."${registry_endpoint}"]
http = true
insecure = true
[registry."127.0.0.1:30500"]
http = true
insecure = true
[registry."localhost:30500"]
http = true
insecure = true
EOF
docker buildx rm lab-builder 2>/dev/null || true
docker buildx create --name lab-builder --driver docker-container --driver-opt network=host --config "${BUILDX_CONFIG}" --use
docker buildx inspect --bootstrap
}
dump_argocd_debug() {
local app="$1"
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true
}
dump_namespace_debug() {
local namespace="$1"
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
}
wait_for_namespace() {
local namespace="$1"
local app="$2"
local timeout_seconds="$3"
local elapsed=0
until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2
dump_argocd_debug "${app}"
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
wait_for_namespaced_resource() {
local namespace="$1"
local kind="$2"
local name="$3"
local app="$4"
local timeout_seconds="$5"
local elapsed=0
until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2
dump_argocd_debug "${app}"
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
wait_for_deployment_ready() {
local namespace="$1"
local deployment="$2"
local app="$3"
local timeout_seconds="$4"
local desired_replicas
local ready_replicas
local elapsed=0
desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
desired_replicas="${desired_replicas:-1}"
until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \
(( ${ready_replicas:-0} >= desired_replicas )); do
if ((elapsed >= timeout_seconds)); then
echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2
dump_argocd_debug "${app}"
dump_namespace_debug "${namespace}"
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
deploy_gitea() {
local mode="${LAB_GITEA_DEPLOY:-true}"
local gitea_host="${LAB_GITEA_HOST:-${LAB_RASPBERRY_HOST:-192.168.100.89}}"
local gitea_user="${LAB_GITEA_USER:-${LAB_RASPBERRY_USER:-jv}}"
local gitea_key="${LAB_GITEA_SSH_KEY_PATH:-${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}}"
local install_dir="${LAB_GITEA_INSTALL_DIR:-/opt/homelab-gitea}"
local image="${LAB_GITEA_IMAGE:-gitea/gitea:1.21.7}"
local http_port="${LAB_GITEA_HTTP_PORT:-3000}"
local ssh_port="${LAB_GITEA_SSH_PORT:-32222}"
local domain="${LAB_GITEA_DOMAIN:-lab2025.duckdns.org}"
local root_url="${LAB_GITEA_ROOT_URL:-https://lab2025.duckdns.org/git/}"
local container_name="${LAB_GITEA_CONTAINER_NAME:-homelab-gitea}"
local compose_file="${REPO_ROOT}/infra/gitea/docker-compose.yml"
require_debian_server "deploy-gitea"
if disabled_value "${mode}"; then
install_gitea_backup_timer
return 0
fi
if [[ ! -s "${compose_file}" ]]; then
echo "Missing ${compose_file}" >&2
exit 1
fi
echo "Deploying external Gitea on ${gitea_user}@${gitea_host}:${http_port}..."
ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "rm -rf /tmp/homelab-gitea && mkdir -p /tmp/homelab-gitea"
scp -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${compose_file}" "${gitea_user}@${gitea_host}:/tmp/homelab-gitea/docker-compose.yml"
ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "set -eu
install_dir='${install_dir}'
install_missing_packages() {
missing_packages=''
for package in \"\$@\"; do
if ! dpkg-query -W -f='\${Status}' \"\$package\" 2>/dev/null | grep -q 'install ok installed'; then
missing_packages=\"\$missing_packages \$package\"
fi
done
if [ -n \"\$missing_packages\" ]; then
sudo apt-get update
sudo apt-get install -y --no-install-recommends \$missing_packages
fi
}
install_missing_packages ca-certificates curl iptables
if ! command -v docker >/dev/null 2>&1; then
curl -fsSL https://get.docker.com | sudo sh
fi
if ! sudo docker compose version >/dev/null 2>&1; then
install_missing_packages docker-compose-plugin
fi
repair_docker_iptables() {
if sudo iptables -t nat -S DOCKER >/dev/null 2>&1; then
return 0
fi
echo 'Docker NAT chain is missing on the Gitea host; restarting Docker once to restore iptables state...'
sudo systemctl restart docker
sleep 3
if sudo iptables -t nat -S DOCKER >/dev/null 2>&1; then
return 0
fi
echo 'Docker NAT chain is still missing after restarting Docker.' >&2
sudo iptables -t nat -S >&2 || true
sudo systemctl status docker --no-pager -l >&2 || true
exit 1
}
repair_docker_iptables
sudo mkdir -p \"\$install_dir/data\"
sudo cp /tmp/homelab-gitea/docker-compose.yml \"\$install_dir/docker-compose.yml\"
sudo chown -R 1000:1000 \"\$install_dir/data\"
sudo tee \"\$install_dir/.env\" >/dev/null <<ENV_EOT
GITEA_IMAGE=${image}
GITEA_CONTAINER_NAME=${container_name}
GITEA_HTTP_PORT=${http_port}
GITEA_SSH_PORT=${ssh_port}
GITEA_DOMAIN=${domain}
GITEA_ROOT_URL=${root_url}
GITEA_UID=1000
GITEA_GID=1000
ENV_EOT
cd \"\$install_dir\"
sudo docker compose pull
sudo docker compose up -d --remove-orphans
sudo docker compose ps
"
install_gitea_backup_timer
}
gitea_bootstrap_password() {
if command -v openssl >/dev/null 2>&1; then
openssl rand -hex 32
return 0
fi
python3 - <<'PY'
import secrets
print(secrets.token_hex(32))
PY
}
gitea_api_base_url() {
local gitea_host="$1"
local http_port="$2"
local candidate
local api_base_override="${LAB_GITEA_API_BASE_URL:-}"
if [[ -n "${api_base_override}" ]]; then
printf '%s\n' "${api_base_override%/}"
return 0
fi
for candidate in "http://${gitea_host}:${http_port}/api/v1" "http://${gitea_host}:${http_port}/git/api/v1"; do
if curl -fsS "${candidate}/version" >/dev/null 2>&1; then
printf '%s\n' "${candidate}"
return 0
fi
done
echo "Could not reach the Gitea API on ${gitea_host}:${http_port}." >&2
exit 1
}
gitea_repo_exists() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local owner="$4"
local repo_name="$5"
local status
status="$(curl -sS -o /dev/null -w '%{http_code}' -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}")"
case "${status}" in
200)
return 0
;;
404)
return 1
;;
401 | 403)
echo "Gitea API authentication failed for ${auth_user} while checking ${owner}/${repo_name}." >&2
exit 1
;;
*)
echo "Unexpected Gitea API response ${status} while checking ${owner}/${repo_name}." >&2
exit 1
;;
esac
}
gitea_branch_exists() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local owner="$4"
local repo_name="$5"
local branch="$6"
local status
status="$(curl -sS -o /dev/null -w '%{http_code}' -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}/branches/${branch}")"
case "${status}" in
200)
return 0
;;
404)
return 1
;;
401 | 403)
echo "Gitea API authentication failed for ${auth_user} while checking ${owner}/${repo_name}:${branch}." >&2
exit 1
;;
*)
echo "Unexpected Gitea API response ${status} while checking ${owner}/${repo_name}:${branch}." >&2
exit 1
;;
esac
}
create_gitea_repo() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local repo_name="$4"
local default_branch="$5"
local payload
payload="$(python3 - "${repo_name}" "${default_branch}" <<'PY'
import json
import sys
repo_name, default_branch = sys.argv[1:3]
print(json.dumps({
"name": repo_name,
"private": False,
"auto_init": False,
"default_branch": default_branch,
"description": "Homelab infrastructure configuration",
}))
PY
)"
curl -fsS \
-u "${auth_user}:${auth_password}" \
-H "Content-Type: application/json" \
-X POST \
-d "${payload}" \
"${api_base}/user/repos" >/dev/null
}
gitea_public_key_registered() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local owner="$4"
local repo_name="$5"
local public_key_path="$6"
local repo_keys
local user_keys
user_keys="$(curl -fsS -u "${auth_user}:${auth_password}" "${api_base}/user/keys?limit=100")"
repo_keys="$(curl -fsS -u "${auth_user}:${auth_password}" "${api_base}/repos/${owner}/${repo_name}/keys?limit=100")"
GITEA_PUBLIC_KEY="$(<"${public_key_path}")" \
GITEA_USER_KEYS="${user_keys}" \
GITEA_REPO_KEYS="${repo_keys}" \
python3 - <<'PY'
import json
import os
import sys
public_key = os.environ["GITEA_PUBLIC_KEY"].strip()
for env_name in ("GITEA_USER_KEYS", "GITEA_REPO_KEYS"):
for key in json.loads(os.environ[env_name]) or []:
if key.get("key", "").strip() == public_key:
sys.exit(0)
sys.exit(1)
PY
}
create_gitea_repo_deploy_key() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local owner="$4"
local repo_name="$5"
local title="$6"
local public_key_path="$7"
local read_only="$8"
local payload
payload="$(
GITEA_DEPLOY_KEY_TITLE="${title}" \
GITEA_PUBLIC_KEY="$(<"${public_key_path}")" \
GITEA_DEPLOY_KEY_READ_ONLY="${read_only}" \
python3 - <<'PY'
import json
import os
print(json.dumps({
"title": os.environ["GITEA_DEPLOY_KEY_TITLE"],
"key": os.environ["GITEA_PUBLIC_KEY"].strip(),
"read_only": os.environ["GITEA_DEPLOY_KEY_READ_ONLY"] == "true",
}))
PY
)"
curl -fsS \
-u "${auth_user}:${auth_password}" \
-H "Content-Type: application/json" \
-X POST \
-d "${payload}" \
"${api_base}/repos/${owner}/${repo_name}/keys" >/dev/null
}
ensure_gitea_repo_ssh_access() {
local api_base="$1"
local auth_user="$2"
local auth_password="$3"
local owner="$4"
local repo_name="$5"
local ssh_host="$6"
local ssh_port="$7"
local key_path="$8"
local key_title="$9"
local key_read_only="${10}"
local key_dir
local known_hosts
local public_key_path
local read_only_json="false"
local ssh_repo_url
if [[ "${key_path}" =~ [[:space:]] || "${key_path}" == *"'"* ]]; then
echo "LAB_GITEA_REPO_SSH_KEY_PATH cannot contain whitespace or single quotes." >&2
exit 1
fi
key_dir="$(dirname "${key_path}")"
public_key_path="${key_path}.pub"
mkdir -p "${key_dir}"
chmod 0700 "${key_dir}"
if [[ ! -s "${key_path}" && ! -s "${public_key_path}" ]]; then
ssh-keygen -t ed25519 -N "" -f "${key_path}" -C "${key_title}" >/dev/null
elif [[ -s "${key_path}" && ! -s "${public_key_path}" ]]; then
ssh-keygen -y -f "${key_path}" >"${public_key_path}"
elif [[ ! -s "${key_path}" ]]; then
echo "Public key ${public_key_path} exists, but private key ${key_path} is missing." >&2
exit 1
fi
chmod 0600 "${key_path}"
chmod 0644 "${public_key_path}"
if truthy "${key_read_only}"; then
read_only_json="true"
fi
if gitea_public_key_registered "${api_base}" "${auth_user}" "${auth_password}" "${owner}" "${repo_name}" "${public_key_path}"; then
echo "Gitea already has Debian host SSH key ${public_key_path}."
else
create_gitea_repo_deploy_key "${api_base}" "${auth_user}" "${auth_password}" "${owner}" "${repo_name}" "${key_title}" "${public_key_path}" "${read_only_json}"
echo "Added Debian host SSH key ${public_key_path} to ${owner}/${repo_name}."
fi
known_hosts="${HOME}/.ssh/known_hosts"
touch "${known_hosts}"
chmod 0644 "${known_hosts}"
if ! ssh-keygen -F "[${ssh_host}]:${ssh_port}" -f "${known_hosts}" >/dev/null 2>&1; then
ssh-keyscan -p "${ssh_port}" "${ssh_host}" >>"${known_hosts}" 2>/dev/null
fi
ssh_repo_url="ssh://git@${ssh_host}:${ssh_port}/${owner}/${repo_name}.git"
git -C "${REPO_ROOT}" remote set-url gitea "${ssh_repo_url}" 2>/dev/null ||
git -C "${REPO_ROOT}" remote add gitea "${ssh_repo_url}"
git -C "${REPO_ROOT}" config core.sshCommand "ssh -i ${key_path} -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new"
git -C "${REPO_ROOT}" ls-remote gitea HEAD >/dev/null
echo "Gitea SSH remote: ${ssh_repo_url}"
}
bootstrap_gitea_repo() {
local mode="${LAB_GITEA_REPO_BOOTSTRAP:-true}"
local gitea_host="${LAB_GITEA_HOST:-${LAB_RASPBERRY_HOST:-192.168.100.89}}"
local gitea_user="${LAB_GITEA_USER:-${LAB_RASPBERRY_USER:-jv}}"
local gitea_key="${LAB_GITEA_SSH_KEY_PATH:-${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}}"
local container_name="${LAB_GITEA_CONTAINER_NAME:-homelab-gitea}"
local http_port="${LAB_GITEA_HTTP_PORT:-3000}"
local ssh_port="${LAB_GITEA_SSH_PORT:-32222}"
local root_url="${LAB_GITEA_ROOT_URL:-https://lab2025.duckdns.org/git/}"
local repo_owner="${LAB_GITEA_REPO_OWNER:-jv}"
local repo_name="${LAB_GITEA_REPO_NAME:-my-homelab-configs}"
local default_branch="${LAB_GITEA_REPO_DEFAULT_BRANCH:-main}"
local bootstrap_user="${LAB_GITEA_BOOTSTRAP_USER:-${repo_owner}}"
local bootstrap_email="${LAB_GITEA_BOOTSTRAP_EMAIL:-${bootstrap_user}@homelab.local}"
local credentials_file="${LAB_GITEA_BOOTSTRAP_CREDENTIALS_FILE:-${HOME}/.config/homelab/gitea-bootstrap.env}"
local bootstrap_password="${LAB_GITEA_BOOTSTRAP_PASSWORD:-}"
local allow_dirty="${LAB_GITEA_BOOTSTRAP_ALLOW_DIRTY:-false}"
local ssh_bootstrap="${LAB_GITEA_REPO_SSH_BOOTSTRAP:-true}"
local ssh_key_path="${LAB_GITEA_REPO_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}"
local ssh_key_title="${LAB_GITEA_REPO_DEPLOY_KEY_TITLE:-debian-host-${repo_name}}"
local ssh_key_read_only="${LAB_GITEA_REPO_DEPLOY_KEY_READ_ONLY:-false}"
local api_base
local public_repo_url
local direct_repo_url
local push_url
local askpass
local credentials_dir
local remote_status
local worktree_status
require_debian_server "bootstrap-gitea-repo"
if disabled_value "${mode}"; then
return 0
fi
ensure_python3
for value_name in repo_owner repo_name default_branch bootstrap_user; do
local value="${!value_name}"
if ! [[ "${value}" =~ ^[A-Za-z0-9_.-]+$ ]]; then
echo "${value_name} contains unsupported characters." >&2
exit 1
fi
done
if [[ "${bootstrap_email}" == *"'"* ]]; then
echo "LAB_GITEA_BOOTSTRAP_EMAIL cannot contain a single quote." >&2
exit 1
fi
if ! [[ "${ssh_port}" =~ ^[0-9]+$ ]]; then
echo "LAB_GITEA_SSH_PORT must be numeric." >&2
exit 1
fi
if [[ -z "${bootstrap_password}" && -r "${credentials_file}" ]]; then
# shellcheck disable=SC1090
source "${credentials_file}"
bootstrap_user="${GITEA_BOOTSTRAP_USER:-${bootstrap_user}}"
bootstrap_email="${GITEA_BOOTSTRAP_EMAIL:-${bootstrap_email}}"
bootstrap_password="${GITEA_BOOTSTRAP_PASSWORD:-}"
fi
if [[ -z "${bootstrap_password}" ]]; then
bootstrap_password="$(gitea_bootstrap_password)"
credentials_dir="$(dirname "${credentials_file}")"
mkdir -p "${credentials_dir}"
chmod 0700 "${credentials_dir}"
{
printf "GITEA_BOOTSTRAP_USER='%s'\n" "${bootstrap_user}"
printf "GITEA_BOOTSTRAP_EMAIL='%s'\n" "${bootstrap_email}"
printf "GITEA_BOOTSTRAP_PASSWORD='%s'\n" "${bootstrap_password}"
} > "${credentials_file}"
chmod 0600 "${credentials_file}"
echo "Generated Gitea bootstrap credentials at ${credentials_file}."
fi
for value_name in repo_owner repo_name default_branch bootstrap_user; do
local value="${!value_name}"
if ! [[ "${value}" =~ ^[A-Za-z0-9_.-]+$ ]]; then
echo "${value_name} contains unsupported characters." >&2
exit 1
fi
done
for value_name in bootstrap_email bootstrap_password; do
local value="${!value_name}"
if [[ "${value}" == *"'"* ]]; then
echo "${value_name} cannot contain a single quote." >&2
exit 1
fi
done
echo "Bootstrapping Gitea repository ${repo_owner}/${repo_name}..."
# shellcheck disable=SC2087
ssh -i "${gitea_key}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "${gitea_user}@${gitea_host}" "bash -s" <<EOF
set -euo pipefail
container_name='${container_name}'
bootstrap_user='${bootstrap_user}'
bootstrap_email='${bootstrap_email}'
bootstrap_password='${bootstrap_password}'
if ! sudo docker inspect "\${container_name}" >/dev/null 2>&1; then
echo "Gitea container \${container_name} is not running on ${gitea_host}." >&2
exit 1
fi
for attempt in \$(seq 1 60); do
if curl -fsS http://127.0.0.1:3000/api/v1/version >/dev/null 2>&1 ||
curl -fsS http://127.0.0.1:3000/git/api/v1/version >/dev/null 2>&1; then
break
fi
if [ "\${attempt}" = "60" ]; then
echo "Timed out waiting for Gitea API inside \${container_name}." >&2
exit 1
fi
sleep 2
done
if ! sudo docker exec -u git "\${container_name}" gitea -c /data/gitea/conf/app.ini admin user create \
--username "\${bootstrap_user}" \
--password "\${bootstrap_password}" \
--email "\${bootstrap_email}" \
--admin \
--must-change-password=false >/tmp/homelab-gitea-user-create.log 2>&1; then
if ! sudo docker exec -u git "\${container_name}" gitea -c /data/gitea/conf/app.ini admin user list | awk -v user="\${bootstrap_user}" 'NR > 1 && \$2 == user { found = 1 } END { exit found ? 0 : 1 }'; then
cat /tmp/homelab-gitea-user-create.log >&2
exit 1
fi
fi
EOF
api_base="$(gitea_api_base_url "${gitea_host}" "${http_port}")"
if gitea_repo_exists "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_owner}" "${repo_name}"; then
echo "Gitea repository ${repo_owner}/${repo_name} already exists."
else
if [[ "${repo_owner}" != "${bootstrap_user}" ]]; then
echo "Gitea repository owner ${repo_owner} does not exist yet; only user-owned bootstrap repos are supported." >&2
exit 1
fi
create_gitea_repo "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_name}" "${default_branch}"
echo "Created Gitea repository ${repo_owner}/${repo_name}."
fi
public_repo_url="${root_url%/}/${repo_owner}/${repo_name}.git"
if [[ "${api_base}" == */git/api/v1 ]]; then
direct_repo_url="http://${gitea_host}:${http_port}/git/${repo_owner}/${repo_name}.git"
else
direct_repo_url="http://${gitea_host}:${http_port}/${repo_owner}/${repo_name}.git"
fi
push_url="${LAB_GITEA_BOOTSTRAP_PUSH_URL:-${direct_repo_url}}"
git -C "${REPO_ROOT}" rev-parse --is-inside-work-tree >/dev/null
git -C "${REPO_ROOT}" remote set-url gitea "${public_repo_url}" 2>/dev/null ||
git -C "${REPO_ROOT}" remote add gitea "${public_repo_url}"
if gitea_branch_exists "${api_base}" "${bootstrap_user}" "${bootstrap_password}" "${repo_owner}" "${repo_name}" "${default_branch}"; then
echo "Gitea branch ${default_branch} already exists; leaving existing history unchanged."
else
worktree_status="$(git -C "${REPO_ROOT}" status --porcelain)"
if [[ -n "${worktree_status}" ]] && ! truthy "${allow_dirty}"; then
echo "Refusing to seed Gitea from a dirty working tree; commit or stash changes first." >&2
echo "Set LAB_GITEA_BOOTSTRAP_ALLOW_DIRTY=true to push committed HEAD anyway." >&2
exit 1
fi
askpass="$(mktemp)"
trap 'rm -f "${askpass}" "${BUILDX_CONFIG}"' EXIT
cat > "${askpass}" <<ASKPASS_EOT
#!/usr/bin/env bash
case "\$1" in
*Username*) printf '%s\n' '${bootstrap_user}' ;;
*Password*) printf '%s\n' '${bootstrap_password}' ;;
*) printf '\n' ;;
esac
ASKPASS_EOT
chmod 0700 "${askpass}"
GIT_ASKPASS="${askpass}" GIT_TERMINAL_PROMPT=0 \
git -C "${REPO_ROOT}" push "${push_url}" "HEAD:refs/heads/${default_branch}"
rm -f "${askpass}"
trap 'rm -f "${BUILDX_CONFIG}"' EXIT
echo "Pushed current HEAD to Gitea branch ${default_branch}."
fi
remote_status="$(git -C "${REPO_ROOT}" remote get-url gitea)"
echo "Gitea remote: ${remote_status}"
if ! disabled_value "${ssh_bootstrap}"; then
ensure_gitea_repo_ssh_access \
"${api_base}" \
"${bootstrap_user}" \
"${bootstrap_password}" \
"${repo_owner}" \
"${repo_name}" \
"${gitea_host}" \
"${ssh_port}" \
"${ssh_key_path}" \
"${ssh_key_title}" \
"${ssh_key_read_only}"
fi
}
install_gitea_backup_timer() {
local gitea_host="${LAB_GITEA_HOST:-${LAB_RASPBERRY_HOST:-192.168.100.89}}"
local gitea_user="${LAB_GITEA_USER:-${LAB_RASPBERRY_USER:-jv}}"
local gitea_key="${LAB_GITEA_SSH_KEY_PATH:-${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}}"
local gitea_container="${LAB_GITEA_CONTAINER_NAME:-homelab-gitea}"
local backup_dir="${LAB_GITEA_BACKUP_DIR:-/home/jv/backups/gitea}"
local backup_script="/usr/local/sbin/homelab-gitea-backup.sh"
local restore_drill_script="/usr/local/sbin/homelab-gitea-restore-drill.sh"
sudo tee "${backup_script}" >/dev/null <<BACKUP_SCRIPT_EOT
#!/usr/bin/env bash
set -euo pipefail
GITEA_HOST="\${GITEA_HOST:-${gitea_host}}"
GITEA_USER="\${GITEA_USER:-${gitea_user}}"
GITEA_SSH_KEY_PATH="\${GITEA_SSH_KEY_PATH:-${gitea_key}}"
GITEA_CONTAINER="\${GITEA_CONTAINER:-${gitea_container}}"
GITEA_BACKUP_DIR="\${GITEA_BACKUP_DIR:-${backup_dir}}"
GITEA_BACKUP_RETENTION_DAYS="\${GITEA_BACKUP_RETENTION_DAYS:-30}"
REMOTE_ARCHIVE="/tmp/homelab-gitea-dump.zip"
timestamp="\$(date -u +%Y%m%dT%H%M%SZ)"
tmp_archive="\$(mktemp "/tmp/gitea-\${timestamp}.XXXXXX.zip")"
backup_archive="\${GITEA_BACKUP_DIR}/gitea-\${timestamp}.zip"
remote_host_archive="/tmp/gitea-\${timestamp}.zip"
ssh_gitea() {
ssh -i "\${GITEA_SSH_KEY_PATH}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new "\${GITEA_USER}@\${GITEA_HOST}" "\$@"
}
cleanup() {
rm -f "\${tmp_archive}"
ssh_gitea "rm -f '\${remote_host_archive}'; sudo docker exec -u git '\${GITEA_CONTAINER}' rm -f '\${REMOTE_ARCHIVE}' >/dev/null 2>&1 || true" >/dev/null 2>&1 || true
}
trap cleanup EXIT
ssh_gitea "set -eu
sudo docker exec -u git '\${GITEA_CONTAINER}' rm -f '\${REMOTE_ARCHIVE}' >/dev/null 2>&1 || true
sudo docker exec -u git '\${GITEA_CONTAINER}' sh -c 'mkdir -p /data/git/repositories'
sudo docker exec -u git '\${GITEA_CONTAINER}' gitea dump -c /data/gitea/conf/app.ini --file '\${REMOTE_ARCHIVE}'
sudo docker cp '\${GITEA_CONTAINER}:\${REMOTE_ARCHIVE}' '\${remote_host_archive}'
sudo chown '\${GITEA_USER}:\${GITEA_USER}' '\${remote_host_archive}'
sudo docker exec -u git '\${GITEA_CONTAINER}' rm -f '\${REMOTE_ARCHIVE}' >/dev/null 2>&1 || true"
scp -i "\${GITEA_SSH_KEY_PATH}" -o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new \
"\${GITEA_USER}@\${GITEA_HOST}:\${remote_host_archive}" "\${tmp_archive}"
sudo mkdir -p "\${GITEA_BACKUP_DIR}"
sudo chown jv:jv "\${GITEA_BACKUP_DIR}"
sudo install -m 0640 -o jv -g jv "\${tmp_archive}" "\${backup_archive}"
sudo find "\${GITEA_BACKUP_DIR}" -type f -name 'gitea-*.zip' -mtime +"\${GITEA_BACKUP_RETENTION_DAYS}" -delete
echo "Created \${backup_archive}"
BACKUP_SCRIPT_EOT
sudo chmod 0755 "${backup_script}"
sudo tee /etc/systemd/system/homelab-gitea-backup.service >/dev/null <<'SERVICE_EOT'
[Unit]
Description=Back up external Homelab Gitea to Debian host storage
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/local/sbin/homelab-gitea-backup.sh
SERVICE_EOT
sudo tee /etc/systemd/system/homelab-gitea-backup.timer >/dev/null <<'TIMER_EOT'
[Unit]
Description=Run daily Homelab Gitea backups
[Timer]
OnCalendar=*-*-* 02:35:00
RandomizedDelaySec=20m
Persistent=true
[Install]
WantedBy=timers.target
TIMER_EOT
sudo tee "${restore_drill_script}" >/dev/null <<'RESTORE_DRILL_SCRIPT_EOT'
#!/usr/bin/env bash
set -euo pipefail
GITEA_BACKUP_DIR="${GITEA_BACKUP_DIR:-/home/jv/backups/gitea}"
GITEA_RESTORE_DRILL_DIR="${GITEA_RESTORE_DRILL_DIR:-/home/jv/backups/gitea-restore-drills}"
GITEA_RESTORE_DRILL_RETENTION_DAYS="${GITEA_RESTORE_DRILL_RETENTION_DAYS:-90}"
if ! command -v python3 >/dev/null 2>&1; then
echo "python3 is required for Gitea restore drills." >&2
exit 1
fi
latest_archive="$(
{ find "${GITEA_BACKUP_DIR}" -maxdepth 1 -type f -name 'gitea-*.zip' -printf '%T@ %p\n' 2>/dev/null || true; } |
sort -nr |
awk 'NR == 1 { sub(/^[^ ]+ /, ""); print }'
)"
if [[ -z "${latest_archive}" ]]; then
echo "Skipping Gitea restore drill: no backup archive found in ${GITEA_BACKUP_DIR}."
exit 0
fi
timestamp="$(date -u +%Y%m%dT%H%M%SZ)"
tmp_dir="$(mktemp -d "/tmp/gitea-restore-drill-${timestamp}.XXXXXX")"
tmp_report="$(mktemp "/tmp/gitea-restore-drill-${timestamp}.XXXXXX.txt")"
report_path="${GITEA_RESTORE_DRILL_DIR}/gitea-restore-drill-${timestamp}.txt"
cleanup() {
rm -rf "${tmp_dir}"
rm -f "${tmp_report}"
}
trap cleanup EXIT
python3 - "${latest_archive}" "${tmp_dir}" "${tmp_report}" <<'PY'
import os
import sys
import zipfile
archive_path, extract_dir, report_path = sys.argv[1:4]
with zipfile.ZipFile(archive_path) as archive:
bad_member = archive.testzip()
if bad_member:
raise SystemExit(f"ZIP integrity check failed at {bad_member}")
members = archive.infolist()
if not members:
raise SystemExit("ZIP archive is empty")
extract_root = os.path.abspath(extract_dir)
for member in members:
target = os.path.abspath(os.path.join(extract_root, member.filename))
if target != extract_root and not target.startswith(extract_root + os.sep):
raise SystemExit(f"Unsafe archive path: {member.filename}")
archive.extractall(extract_root)
file_count = 0
total_bytes = 0
for root, _, files in os.walk(extract_dir):
for name in files:
file_count += 1
total_bytes += os.path.getsize(os.path.join(root, name))
if file_count == 0:
raise SystemExit("Archive extracted no files")
with open(report_path, "w", encoding="utf-8") as handle:
handle.write("Gitea restore drill report\n")
handle.write(f"archive={archive_path}\n")
handle.write(f"archive_size_bytes={os.path.getsize(archive_path)}\n")
handle.write(f"extracted_files={file_count}\n")
handle.write(f"extracted_bytes={total_bytes}\n")
handle.write("result=ok\n")
PY
sudo mkdir -p "${GITEA_RESTORE_DRILL_DIR}"
sudo install -m 0640 -o root -g root "${tmp_report}" "${report_path}"
sudo find "${GITEA_RESTORE_DRILL_DIR}" -type f -name 'gitea-restore-drill-*.txt' -mtime +"${GITEA_RESTORE_DRILL_RETENTION_DAYS}" -delete
echo "Created ${report_path}"
RESTORE_DRILL_SCRIPT_EOT
sudo chmod 0755 "${restore_drill_script}"
sudo tee /etc/systemd/system/homelab-gitea-restore-drill.service >/dev/null <<'RESTORE_DRILL_SERVICE_EOT'
[Unit]
Description=Run a non-destructive Gitea backup restore drill
After=network-online.target homelab-gitea-backup.service
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/local/sbin/homelab-gitea-restore-drill.sh
RESTORE_DRILL_SERVICE_EOT
sudo tee /etc/systemd/system/homelab-gitea-restore-drill.timer >/dev/null <<'RESTORE_DRILL_TIMER_EOT'
[Unit]
Description=Run monthly Homelab Gitea restore drills
[Timer]
OnCalendar=monthly
RandomizedDelaySec=2h
Persistent=true
[Install]
WantedBy=timers.target
RESTORE_DRILL_TIMER_EOT
sudo systemctl daemon-reload
sudo systemctl enable --now homelab-gitea-backup.timer >/dev/null
sudo systemctl enable --now homelab-gitea-restore-drill.timer >/dev/null
}
backup_gitea() {
require_debian_server "backup-gitea"
install_gitea_backup_timer
sudo /usr/local/sbin/homelab-gitea-backup.sh
}
drill_gitea_restore() {
require_debian_server "drill-gitea-restore"
install_gitea_backup_timer
sudo /usr/local/sbin/homelab-gitea-restore-drill.sh
}
install_gitea_runner() {
local runner_arch
local runner_home="${GITEA_RUNNER_HOME:-/home/jv/.local/share/gitea-runner/my-homelab-configs}"
local runner_instance="${GITEA_RUNNER_INSTANCE_URL:-https://lab2025.duckdns.org/git/}"
local runner_labels="${GITEA_RUNNER_LABELS:-homelab-debian:host}"
local runner_name="${GITEA_RUNNER_NAME:-homelab-debian-my-homelab-configs}"
local runner_token="${GITEA_RUNNER_REGISTRATION_TOKEN:-${1:-}}"
local runner_user="${GITEA_RUNNER_USER:-jv}"
local runner_version="${GITEA_ACT_RUNNER_VERSION:-0.2.11}"
local missing_packages=()
require_debian_server "install-gitea-runner"
case "$(dpkg --print-architecture)" in
amd64)
runner_arch="linux-amd64"
;;
arm64)
runner_arch="linux-arm64"
;;
*)
echo "Unsupported Debian architecture: $(dpkg --print-architecture)" >&2
exit 1
;;
esac
for package in ca-certificates curl git nodejs python3; do
if ! dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "install ok installed"; then
missing_packages+=("$package")
fi
done
if [[ ${#missing_packages[@]} -gt 0 ]]; then
sudo apt-get update
sudo apt-get install -y --no-install-recommends "${missing_packages[@]}"
fi
sudo curl -fsSL \
-o /usr/local/bin/act_runner \
"https://gitea.com/gitea/act_runner/releases/download/v${runner_version}/act_runner-${runner_version}-${runner_arch}"
sudo chmod 0755 /usr/local/bin/act_runner
sudo chown root:root /usr/local/bin/act_runner
sudo -u "${runner_user}" mkdir -p "${runner_home}"
if [[ ! -f "${runner_home}/.runner" ]]; then
if [[ -z "${runner_token}" ]]; then
echo "Set GITEA_RUNNER_REGISTRATION_TOKEN to the repository-level runner token from Gitea." >&2
exit 1
fi
sudo -u "${runner_user}" env \
HOME="/home/${runner_user}" \
GITEA_RUNNER_HOME="${runner_home}" \
GITEA_RUNNER_INSTANCE_URL="${runner_instance}" \
GITEA_RUNNER_REGISTRATION_TOKEN="${runner_token}" \
GITEA_RUNNER_NAME="${runner_name}" \
GITEA_RUNNER_LABELS="${runner_labels}" \
bash -lc 'cd "${GITEA_RUNNER_HOME}" && /usr/local/bin/act_runner register --no-interactive --instance "${GITEA_RUNNER_INSTANCE_URL}" --token "${GITEA_RUNNER_REGISTRATION_TOKEN}" --name "${GITEA_RUNNER_NAME}" --labels "${GITEA_RUNNER_LABELS}"'
else
echo "Existing runner registration found at ${runner_home}/.runner; keeping it."
fi
sudo tee /etc/systemd/system/homelab-gitea-runner.service >/dev/null <<SERVICE_EOT
[Unit]
Description=Homelab Gitea Actions runner for my-homelab-configs
After=network-online.target docker.service
Wants=network-online.target
[Service]
Type=simple
User=${runner_user}
Group=${runner_user}
WorkingDirectory=${runner_home}
Environment=HOME=/home/${runner_user}
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/usr/local/bin/act_runner daemon
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
SERVICE_EOT
sudo systemctl daemon-reload
sudo systemctl enable --now homelab-gitea-runner.service >/dev/null
sudo systemctl status homelab-gitea-runner.service --no-pager -l
}
recreate_pods_for_selector() {
local namespace="$1"
local selector="$2"
local app="$3"
if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then
echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2
dump_argocd_debug "${app}"
dump_namespace_debug "${namespace}"
exit 1
fi
}
refresh_argocd_application() {
local app="$1"
kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null
}
apps() {
local buildx_builder_ready=false
local demos_image_built=false
local demos_image_ref
local demos_image_state_file
local demos_platforms
local demos_registry_endpoint
local demos_source_hash
local registry_endpoint
local website_image_built=false
local website_image_ref
local website_image_state_file
local website_platforms
local website_source_hash
require_debian_server "apps"
registry_endpoint="$(website_registry_endpoint)"
demos_registry_endpoint="$(demos_registry_endpoint)"
demos_image_ref="${registry_endpoint}/demos-static:latest"
demos_image_state_file="${REPO_ROOT}/.lab/demos-static-image.state"
demos_platforms="${DEMOS_IMAGE_PLATFORMS:-linux/arm64}"
demos_source_hash="$(demos_source_hash)"
website_image_ref="${registry_endpoint}/php-website:latest"
website_image_state_file="${REPO_ROOT}/.lab/php-website-image.state"
website_platforms="${WEBSITE_IMAGE_PLATFORMS:-linux/arm64}"
website_source_hash="$(website_source_hash)"
export TF_VAR_registry_endpoint="${TF_VAR_registry_endpoint:-${registry_endpoint}}"
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
if [[ "${TF_VAR_registry_endpoint}" != "${registry_endpoint}" ]]; then
echo "TF_VAR_registry_endpoint must match apps/website/web-app.yaml (${registry_endpoint})" >&2
exit 1
fi
if [[ "${demos_registry_endpoint}" != "${registry_endpoint}" ]]; then
echo "apps/demos-static/web-app.yaml registry endpoint (${demos_registry_endpoint}) must match apps/website/web-app.yaml (${registry_endpoint})" >&2
exit 1
fi
echo "Deploying homelab applications..."
run_tofu_stack "bootstrap/apps"
refresh_argocd_application container-registry
refresh_argocd_application demos-static
refresh_argocd_application website-production
wait_for_namespace container-registry container-registry 300
wait_for_namespaced_resource container-registry deployment local-registry container-registry 300
wait_for_deployment_ready container-registry local-registry container-registry 300
if website_image_is_current "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" "${registry_endpoint}"; then
echo "Website image ${website_image_ref} is already current (${website_source_hash}); skipping build."
else
echo "Building website image ${website_image_ref} for ${website_platforms} (${website_source_hash})..."
ensure_docker_build_space
if [[ "${buildx_builder_ready}" != "true" ]]; then
prepare_buildx_builder "${registry_endpoint}"
buildx_builder_ready=true
fi
docker buildx build \
--network host \
--platform "${website_platforms}" \
--provenance=false \
--sbom=false \
--label "dev.homelab.website.source-hash=${website_source_hash}" \
-t "${website_image_ref}" \
-f "${REPO_ROOT}/apps/website/Dockerfile" \
"${REPO_ROOT}/apps/website/" \
--push
website_image_built=true
fi
if demos_image_is_current "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" "${registry_endpoint}"; then
echo "Demos image ${demos_image_ref} is already current (${demos_source_hash}); skipping build."
else
echo "Building demos image ${demos_image_ref} for ${demos_platforms} (${demos_source_hash})..."
ensure_docker_build_space
if [[ "${buildx_builder_ready}" != "true" ]]; then
prepare_buildx_builder "${registry_endpoint}"
buildx_builder_ready=true
fi
docker buildx build \
--network host \
--platform "${demos_platforms}" \
--provenance=false \
--sbom=false \
--label "dev.homelab.demos.source-hash=${demos_source_hash}" \
-t "${demos_image_ref}" \
-f "${REPO_ROOT}/apps/demos-static/Dockerfile" \
"${REPO_ROOT}/apps/demos-static/" \
--push
demos_image_built=true
fi
refresh_argocd_application website-production
wait_for_namespace website-production website-production 300
wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300
if [[ "${website_image_built}" == "true" ]]; then
recreate_pods_for_selector website-production app=php-website website-production
else
echo "Skipping website pod restart because the image did not change."
fi
wait_for_deployment_ready website-production php-website-deployment website-production 300
if [[ "${website_image_built}" == "true" ]]; then
write_website_image_state "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}"
fi
refresh_argocd_application demos-static
wait_for_namespace demos-static demos-static 300
wait_for_namespaced_resource demos-static deployment demos-static demos-static 300
if [[ "${demos_image_built}" == "true" ]]; then
recreate_pods_for_selector demos-static app=demos-static demos-static
else
echo "Skipping demos pod restart because the image did not change."
fi
wait_for_deployment_ready demos-static demos-static demos-static 300
if [[ "${demos_image_built}" == "true" ]]; then
write_demos_image_state "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}"
fi
echo "Application deployment successfully completed."
}
up() {
require_debian_server "up"
echo "Deploying the homelab infrastructure..."
deploy_gitea
bootstrap_gitea_repo
run_tofu_stack "bootstrap/cluster"
run_tofu_stack "bootstrap/platform"
apps
run_tofu_stack "bootstrap/edge"
echo "Deployment successfully completed."
}
rebuild_cluster() {
require_debian_server "rebuild-cluster"
export LAB_INCLUDE_RASPBERRY_WORKER="${LAB_INCLUDE_RASPBERRY_WORKER:-false}"
export WORKER_SSH_TARGETS="${WORKER_SSH_TARGETS:-}"
echo "Rebuilding the Kubernetes cluster without touching external Raspberry Pi Gitea..."
nuke
run_tofu_stack "bootstrap/cluster"
run_tofu_stack "bootstrap/platform"
apps
run_tofu_stack "bootstrap/edge"
echo "Cluster rebuild successfully completed."
}
nuke() {
local worker_ssh_targets
local worker_targets
local target
require_debian_server "nuke"
echo "Brutally nuking the homelab infrastructure..."
worker_ssh_targets="${WORKER_SSH_TARGETS-}"
read -r -a worker_targets <<< "${worker_ssh_targets}"
echo "--> Terminating local OpenTofu tasks..."
killall tofu terraform 2>/dev/null || true
echo "--> Eviscerating local Kubernetes components..."
cleanup_node
sudo rm -f "${KUBECONFIG_PATH}"
for target in "${worker_targets[@]}"; do
echo "--> Eviscerating remote Kubernetes components (${target})..."
if ! ssh -o ConnectTimeout=5 "${target}" "bash -s" <<'EOF'
set -euo pipefail
cleanup_calico_links() {
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
sudo ip link delete vxlan.calico 2>/dev/null || true
sudo ip link delete tunl0 2>/dev/null || true
sudo ip link delete cni0 2>/dev/null || true
sudo ip link delete kube-ipvs0 2>/dev/null || true
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
}
cleanup_iptables() {
sudo iptables -F || true
sudo iptables -X || true
sudo iptables -t nat -F || true
sudo iptables -t nat -X || true
sudo iptables -t mangle -F || true
sudo iptables -t mangle -X || true
sudo iptables -t raw -F || true
sudo iptables -t raw -X || true
if command -v ipvsadm >/dev/null 2>&1; then
sudo ipvsadm --clear || true
fi
}
cleanup_calico_runtime_files() {
local path
for path in /run/calico /var/run/calico; do
if sudo test -e "${path}"; then
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
sudo rmdir "${path}" 2>/dev/null || true
fi
done
}
restore_node_dns() {
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
sudo rm -f /etc/resolv.conf
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
fi
sudo systemctl restart systemd-resolved 2>/dev/null || true
}
cleanup_mounts() {
if command -v findmnt >/dev/null 2>&1; then
local mount_root
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
done | sort -ru
)
fi
while IFS= read -r mountpoint; do
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
}
sudo kubeadm reset --force || true
sudo systemctl stop kubelet 2>/dev/null || true
sudo systemctl stop containerd 2>/dev/null || true
sudo killall containerd-shim-runc-v2 2>/dev/null || true
cleanup_mounts
sudo rm -rf \
/etc/kubernetes/ \
/var/lib/etcd/ \
/var/lib/kubelet/ \
/var/lib/cni/ \
/etc/cni/net.d \
/run/flannel \
/var/lib/calico \
/var/log/calico \
/var/lib/containerd/* \
/run/containerd/* \
/etc/containerd/certs.d \
/etc/containerd/config.toml
cleanup_calico_runtime_files
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
cleanup_iptables
cleanup_calico_links
restore_node_dns
sudo mkdir -p /etc/containerd/certs.d
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
sudo systemctl start containerd 2>/dev/null || true
EOF
then
echo "Remote cleanup failed for ${target}; not deleting OpenTofu state." >&2
exit 1
fi
done
docker buildx rm lab-builder 2>/dev/null || true
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
rm -f "${BUILDX_CONFIG}" || true
echo "--> Deleting OpenTofu tracking state files..."
rm -rf "${REPO_ROOT}"/bootstrap/cluster/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/cluster/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/cluster/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/platform/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/platform/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/platform/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/apps/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/apps/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/apps/.terraform/
rm -rf "${REPO_ROOT}"/bootstrap/edge/terraform.tfstate*
rm -f "${REPO_ROOT}"/bootstrap/edge/.terraform.tfstate.lock.info
rm -rf "${REPO_ROOT}"/bootstrap/edge/.terraform/
echo "Destruction complete. Retained data under /var/openebs/local was left intact."
}
case "${1:-}" in
up)
up
;;
rebuild-cluster)
rebuild_cluster
;;
apps)
apps
;;
deploy-gitea)
deploy_gitea
;;
bootstrap-gitea-repo)
bootstrap_gitea_repo
;;
backup-gitea)
backup_gitea
;;
drill-gitea-restore)
drill_gitea_restore
;;
install-gitea-runner)
install_gitea_runner "${2:-}"
;;
nuke)
nuke
;;
*)
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|nuke}"
exit 1
;;
esac