922 lines
32 KiB
Bash
Executable File
922 lines
32 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
BUILDX_CONFIG="/tmp/buildx-config.toml"
|
|
KUBECONFIG_PATH="${KUBECONFIG_PATH:-${TF_VAR_kubeconfig_path:-/home/jv/.kube/config}}"
|
|
|
|
trap 'rm -f "${BUILDX_CONFIG}"' EXIT
|
|
|
|
require_debian_server() {
|
|
local command_name="$1"
|
|
local os_id=""
|
|
|
|
if [[ "$(uname -s)" != "Linux" ]]; then
|
|
echo "Refusing to run '${command_name}' from this machine. Run it on the Debian homelab server." >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ -r /etc/os-release ]]; then
|
|
os_id="$(awk -F= '$1 == "ID" {gsub(/"/, "", $2); print $2; exit}' /etc/os-release)"
|
|
fi
|
|
|
|
if [[ "${os_id}" != "debian" ]]; then
|
|
echo "Refusing to run '${command_name}' on ${os_id:-unknown OS}. Run it on the Debian homelab server." >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
run_tofu_stack() {
|
|
local stack="$1"
|
|
|
|
tofu -chdir="${REPO_ROOT}/${stack}" init
|
|
tofu -chdir="${REPO_ROOT}/${stack}" apply -auto-approve
|
|
}
|
|
|
|
cleanup_calico_links() {
|
|
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
|
|
sudo ip link delete vxlan.calico 2>/dev/null || true
|
|
sudo ip link delete tunl0 2>/dev/null || true
|
|
sudo ip link delete cni0 2>/dev/null || true
|
|
sudo ip link delete kube-ipvs0 2>/dev/null || true
|
|
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
|
|
}
|
|
|
|
cleanup_iptables() {
|
|
sudo iptables -F || true
|
|
sudo iptables -X || true
|
|
sudo iptables -t nat -F || true
|
|
sudo iptables -t nat -X || true
|
|
sudo iptables -t mangle -F || true
|
|
sudo iptables -t mangle -X || true
|
|
sudo iptables -t raw -F || true
|
|
sudo iptables -t raw -X || true
|
|
if command -v ipvsadm >/dev/null 2>&1; then
|
|
sudo ipvsadm --clear || true
|
|
fi
|
|
}
|
|
|
|
cleanup_calico_runtime_files() {
|
|
local path
|
|
|
|
for path in /run/calico /var/run/calico; do
|
|
if sudo test -e "${path}"; then
|
|
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
|
|
sudo rmdir "${path}" 2>/dev/null || true
|
|
fi
|
|
done
|
|
}
|
|
|
|
restore_node_dns() {
|
|
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
|
|
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
|
|
sudo rm -f /etc/resolv.conf
|
|
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
|
|
fi
|
|
sudo systemctl restart systemd-resolved 2>/dev/null || true
|
|
}
|
|
|
|
cleanup_mounts() {
|
|
if command -v findmnt >/dev/null 2>&1; then
|
|
local mount_root
|
|
while IFS= read -r mountpoint; do
|
|
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
|
|
done < <(
|
|
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
|
|
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
|
|
done | sort -ru
|
|
)
|
|
fi
|
|
while IFS= read -r mountpoint; do
|
|
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
|
|
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
|
|
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
|
|
}
|
|
|
|
cleanup_node() {
|
|
sudo kubeadm reset --force || true
|
|
sudo systemctl stop kubelet 2>/dev/null || true
|
|
sudo systemctl stop containerd 2>/dev/null || true
|
|
sudo killall containerd-shim-runc-v2 2>/dev/null || true
|
|
|
|
cleanup_mounts
|
|
|
|
sudo rm -rf \
|
|
/etc/kubernetes/ \
|
|
/var/lib/etcd/ \
|
|
/var/lib/kubelet/ \
|
|
/var/lib/cni/ \
|
|
/etc/cni/net.d \
|
|
/run/flannel \
|
|
/var/lib/calico \
|
|
/var/log/calico \
|
|
/var/lib/containerd/* \
|
|
/run/containerd/* \
|
|
/etc/containerd/certs.d \
|
|
/etc/containerd/config.toml
|
|
cleanup_calico_runtime_files
|
|
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
|
|
|
|
cleanup_iptables
|
|
cleanup_calico_links
|
|
restore_node_dns
|
|
|
|
sudo mkdir -p /etc/containerd/certs.d
|
|
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
|
|
sudo systemctl start containerd 2>/dev/null || true
|
|
}
|
|
|
|
website_registry_endpoint() {
|
|
local image
|
|
|
|
image="$(awk '$1 == "image:" && $2 ~ /php-website/ {print $2; exit}' "${REPO_ROOT}/apps/website/web-app.yaml")"
|
|
if [[ -z "${image}" || "${image}" != */* ]]; then
|
|
echo "Could not determine website registry endpoint from apps/website/web-app.yaml" >&2
|
|
exit 1
|
|
fi
|
|
|
|
printf '%s\n' "${image%%/*}"
|
|
}
|
|
|
|
demos_registry_endpoint() {
|
|
local image
|
|
|
|
image="$(awk '$1 == "image:" && $2 ~ /demos-static/ {print $2; exit}' "${REPO_ROOT}/apps/demos-static/web-app.yaml")"
|
|
if [[ -z "${image}" || "${image}" != */* ]]; then
|
|
echo "Could not determine demos registry endpoint from apps/demos-static/web-app.yaml" >&2
|
|
exit 1
|
|
fi
|
|
|
|
printf '%s\n' "${image%%/*}"
|
|
}
|
|
|
|
website_source_hash() {
|
|
(
|
|
cd "${REPO_ROOT}"
|
|
find apps/website -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
|
|
)
|
|
}
|
|
|
|
demos_source_hash() {
|
|
(
|
|
cd "${REPO_ROOT}"
|
|
find apps/demos-static -type f -print0 | sort -z | xargs -0 sha256sum | sha256sum | awk '{print $1}'
|
|
)
|
|
}
|
|
|
|
registry_image_exists() {
|
|
local registry_endpoint="$1"
|
|
local repository="$2"
|
|
local tag="$3"
|
|
local accept_header
|
|
|
|
if ! command -v curl >/dev/null 2>&1; then
|
|
return 1
|
|
fi
|
|
|
|
accept_header="application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json"
|
|
curl -fsS \
|
|
-H "Accept: ${accept_header}" \
|
|
"http://${registry_endpoint}/v2/${repository}/manifests/${tag}" >/dev/null
|
|
}
|
|
|
|
image_state_value() {
|
|
local state_file="$1"
|
|
local key="$2"
|
|
|
|
awk -F= -v key="${key}" '$1 == key {print substr($0, index($0, "=") + 1); exit}' "${state_file}" 2>/dev/null || true
|
|
}
|
|
|
|
website_image_is_current() {
|
|
local state_file="$1"
|
|
local source_hash="$2"
|
|
local platforms="$3"
|
|
local image_ref="$4"
|
|
local registry_endpoint="$5"
|
|
local saved_hash
|
|
local saved_platforms
|
|
local saved_image
|
|
|
|
[[ -f "${state_file}" ]] || return 1
|
|
|
|
saved_hash="$(image_state_value "${state_file}" source_hash)"
|
|
saved_platforms="$(image_state_value "${state_file}" platforms)"
|
|
saved_image="$(image_state_value "${state_file}" image)"
|
|
|
|
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
|
|
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
|
|
[[ "${saved_image}" == "${image_ref}" ]] || return 1
|
|
|
|
registry_image_exists "${registry_endpoint}" php-website latest
|
|
}
|
|
|
|
demos_image_is_current() {
|
|
local state_file="$1"
|
|
local source_hash="$2"
|
|
local platforms="$3"
|
|
local image_ref="$4"
|
|
local registry_endpoint="$5"
|
|
local saved_hash
|
|
local saved_platforms
|
|
local saved_image
|
|
|
|
[[ -f "${state_file}" ]] || return 1
|
|
|
|
saved_hash="$(image_state_value "${state_file}" source_hash)"
|
|
saved_platforms="$(image_state_value "${state_file}" platforms)"
|
|
saved_image="$(image_state_value "${state_file}" image)"
|
|
|
|
[[ "${saved_hash}" == "${source_hash}" ]] || return 1
|
|
[[ "${saved_platforms}" == "${platforms}" ]] || return 1
|
|
[[ "${saved_image}" == "${image_ref}" ]] || return 1
|
|
|
|
registry_image_exists "${registry_endpoint}" demos-static latest
|
|
}
|
|
|
|
write_website_image_state() {
|
|
local state_file="$1"
|
|
local source_hash="$2"
|
|
local platforms="$3"
|
|
local image_ref="$4"
|
|
|
|
mkdir -p "$(dirname "${state_file}")"
|
|
{
|
|
printf 'source_hash=%s\n' "${source_hash}"
|
|
printf 'platforms=%s\n' "${platforms}"
|
|
printf 'image=%s\n' "${image_ref}"
|
|
} > "${state_file}"
|
|
}
|
|
|
|
write_demos_image_state() {
|
|
local state_file="$1"
|
|
local source_hash="$2"
|
|
local platforms="$3"
|
|
local image_ref="$4"
|
|
|
|
mkdir -p "$(dirname "${state_file}")"
|
|
{
|
|
printf 'source_hash=%s\n' "${source_hash}"
|
|
printf 'platforms=%s\n' "${platforms}"
|
|
printf 'image=%s\n' "${image_ref}"
|
|
} > "${state_file}"
|
|
}
|
|
|
|
path_available_mb() {
|
|
local path="$1"
|
|
|
|
while [[ ! -e "${path}" && "${path}" != "/" ]]; do
|
|
path="$(dirname "${path}")"
|
|
done
|
|
|
|
df -Pm "${path}" | awk 'NR == 2 {print $4}'
|
|
}
|
|
|
|
docker_root_dir() {
|
|
docker info --format '{{.DockerRootDir}}' 2>/dev/null || printf '/var/lib/docker\n'
|
|
}
|
|
|
|
prune_unused_docker_build_data() {
|
|
docker buildx rm lab-builder 2>/dev/null || true
|
|
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
|
|
docker builder prune -af 2>/dev/null || true
|
|
docker system prune -af 2>/dev/null || true
|
|
}
|
|
|
|
ensure_docker_build_space() {
|
|
local docker_root
|
|
local free_mb
|
|
local min_free_mb
|
|
|
|
min_free_mb="${DOCKER_BUILD_MIN_FREE_MB:-4096}"
|
|
docker_root="$(docker_root_dir)"
|
|
free_mb="$(path_available_mb "${docker_root}")"
|
|
|
|
if (( free_mb >= min_free_mb )); then
|
|
return 0
|
|
fi
|
|
|
|
echo "Docker data root ${docker_root} has ${free_mb}MiB free; pruning unused Docker build data..."
|
|
prune_unused_docker_build_data
|
|
free_mb="$(path_available_mb "${docker_root}")"
|
|
|
|
if (( free_mb < min_free_mb )); then
|
|
echo "Docker data root ${docker_root} still has only ${free_mb}MiB free after cleanup." >&2
|
|
echo "Free space there or move Docker's data-root to a larger filesystem such as /home before building." >&2
|
|
echo "Override the threshold with DOCKER_BUILD_MIN_FREE_MB if this host can build with less space." >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
prepare_buildx_builder() {
|
|
local registry_endpoint="$1"
|
|
|
|
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
|
|
|
cat <<EOF > "${BUILDX_CONFIG}"
|
|
[registry."${registry_endpoint}"]
|
|
http = true
|
|
insecure = true
|
|
[registry."127.0.0.1:30500"]
|
|
http = true
|
|
insecure = true
|
|
[registry."localhost:30500"]
|
|
http = true
|
|
insecure = true
|
|
EOF
|
|
|
|
docker buildx rm lab-builder 2>/dev/null || true
|
|
docker buildx create --name lab-builder --driver docker-container --driver-opt network=host --config "${BUILDX_CONFIG}" --use
|
|
docker buildx inspect --bootstrap
|
|
}
|
|
|
|
dump_argocd_debug() {
|
|
local app="$1"
|
|
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true
|
|
}
|
|
|
|
dump_namespace_debug() {
|
|
local namespace="$1"
|
|
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
|
|
}
|
|
|
|
wait_for_namespace() {
|
|
local namespace="$1"
|
|
local app="$2"
|
|
local timeout_seconds="$3"
|
|
local elapsed=0
|
|
|
|
until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do
|
|
if ((elapsed >= timeout_seconds)); then
|
|
echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2
|
|
dump_argocd_debug "${app}"
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
elapsed=$((elapsed + 5))
|
|
done
|
|
}
|
|
|
|
wait_for_namespaced_resource() {
|
|
local namespace="$1"
|
|
local kind="$2"
|
|
local name="$3"
|
|
local app="$4"
|
|
local timeout_seconds="$5"
|
|
local elapsed=0
|
|
|
|
until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do
|
|
if ((elapsed >= timeout_seconds)); then
|
|
echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2
|
|
dump_argocd_debug "${app}"
|
|
kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
elapsed=$((elapsed + 5))
|
|
done
|
|
}
|
|
|
|
wait_for_deployment_ready() {
|
|
local namespace="$1"
|
|
local deployment="$2"
|
|
local app="$3"
|
|
local timeout_seconds="$4"
|
|
local desired_replicas
|
|
local ready_replicas
|
|
local elapsed=0
|
|
|
|
desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
|
|
desired_replicas="${desired_replicas:-1}"
|
|
|
|
until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \
|
|
(( ${ready_replicas:-0} >= desired_replicas )); do
|
|
if ((elapsed >= timeout_seconds)); then
|
|
echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2
|
|
dump_argocd_debug "${app}"
|
|
dump_namespace_debug "${namespace}"
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
elapsed=$((elapsed + 5))
|
|
done
|
|
}
|
|
|
|
apply_gitea_bootstrap_manifests() {
|
|
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/namespace.yaml"
|
|
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/storage.yaml"
|
|
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/service.yaml"
|
|
kubectl --kubeconfig "${KUBECONFIG}" apply -f "${REPO_ROOT}/apps/gitea/deployment.yaml"
|
|
|
|
wait_for_namespace gitea-system gitea 300
|
|
wait_for_namespaced_resource gitea-system deployment gitea gitea 300
|
|
wait_for_deployment_ready gitea-system gitea gitea 300
|
|
}
|
|
|
|
install_gitea_backup_timer() {
|
|
local backup_script="/usr/local/sbin/homelab-gitea-backup.sh"
|
|
|
|
sudo tee "${backup_script}" >/dev/null <<BACKUP_SCRIPT_EOT
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
KUBECONFIG_PATH="\${KUBECONFIG_PATH:-${KUBECONFIG}}"
|
|
GITEA_NAMESPACE="\${GITEA_NAMESPACE:-gitea-system}"
|
|
GITEA_SELECTOR="\${GITEA_SELECTOR:-app=gitea}"
|
|
GITEA_CONTAINER="\${GITEA_CONTAINER:-gitea}"
|
|
GITEA_BACKUP_DIR="\${GITEA_BACKUP_DIR:-/var/backups/homelab/gitea}"
|
|
GITEA_BACKUP_RETENTION_DAYS="\${GITEA_BACKUP_RETENTION_DAYS:-30}"
|
|
REMOTE_ARCHIVE="/tmp/homelab-gitea-dump.zip"
|
|
|
|
if [[ ! -s "\${KUBECONFIG_PATH}" ]]; then
|
|
echo "Skipping Gitea backup: kubeconfig \${KUBECONFIG_PATH} does not exist."
|
|
exit 0
|
|
fi
|
|
|
|
if ! command -v kubectl >/dev/null 2>&1; then
|
|
echo "kubectl is required for Gitea backups." >&2
|
|
exit 1
|
|
fi
|
|
|
|
pod="\$(kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" get pods \
|
|
-l "\${GITEA_SELECTOR}" \
|
|
--field-selector=status.phase=Running \
|
|
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
|
|
|
|
if [[ -z "\${pod}" ]]; then
|
|
echo "Skipping Gitea backup: no running Gitea pod found."
|
|
exit 0
|
|
fi
|
|
|
|
timestamp="\$(date -u +%Y%m%dT%H%M%SZ)"
|
|
tmp_archive="\$(mktemp "/tmp/gitea-\${timestamp}.XXXXXX.zip")"
|
|
backup_archive="\${GITEA_BACKUP_DIR}/gitea-\${timestamp}.zip"
|
|
|
|
cleanup() {
|
|
rm -f "\${tmp_archive}"
|
|
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- rm -f "\${REMOTE_ARCHIVE}" >/dev/null 2>&1 || true
|
|
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \
|
|
sh -c 'mkdir -p /data/git/repositories && chown git:git /data/git /data/git/repositories'
|
|
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" exec "\${pod}" -c "\${GITEA_CONTAINER}" -- \
|
|
su-exec git gitea dump -c /data/gitea/conf/app.ini --file "\${REMOTE_ARCHIVE}"
|
|
kubectl --kubeconfig "\${KUBECONFIG_PATH}" -n "\${GITEA_NAMESPACE}" cp -c "\${GITEA_CONTAINER}" \
|
|
"\${GITEA_NAMESPACE}/\${pod}:\${REMOTE_ARCHIVE}" "\${tmp_archive}"
|
|
|
|
sudo mkdir -p "\${GITEA_BACKUP_DIR}"
|
|
sudo install -m 0640 -o root -g root "\${tmp_archive}" "\${backup_archive}"
|
|
sudo find "\${GITEA_BACKUP_DIR}" -type f -name 'gitea-*.zip' -mtime +"\${GITEA_BACKUP_RETENTION_DAYS}" -delete
|
|
|
|
echo "Created \${backup_archive}"
|
|
BACKUP_SCRIPT_EOT
|
|
sudo chmod 0755 "${backup_script}"
|
|
|
|
sudo tee /etc/systemd/system/homelab-gitea-backup.service >/dev/null <<'SERVICE_EOT'
|
|
[Unit]
|
|
Description=Back up in-cluster Gitea to Debian host storage
|
|
After=network-online.target
|
|
Wants=network-online.target
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
ExecStart=/usr/local/sbin/homelab-gitea-backup.sh
|
|
SERVICE_EOT
|
|
|
|
sudo tee /etc/systemd/system/homelab-gitea-backup.timer >/dev/null <<'TIMER_EOT'
|
|
[Unit]
|
|
Description=Run daily Homelab Gitea backups
|
|
|
|
[Timer]
|
|
OnCalendar=*-*-* 02:35:00
|
|
RandomizedDelaySec=20m
|
|
Persistent=true
|
|
|
|
[Install]
|
|
WantedBy=timers.target
|
|
TIMER_EOT
|
|
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl enable --now homelab-gitea-backup.timer >/dev/null
|
|
}
|
|
|
|
backup_gitea() {
|
|
require_debian_server "backup-gitea"
|
|
|
|
export KUBECONFIG="${KUBECONFIG_PATH}"
|
|
install_gitea_backup_timer
|
|
sudo /usr/local/sbin/homelab-gitea-backup.sh
|
|
}
|
|
|
|
install_gitea_runner() {
|
|
local runner_arch
|
|
local runner_home="${GITEA_RUNNER_HOME:-/home/jv/.local/share/gitea-runner/my-homelab-configs}"
|
|
local runner_instance="${GITEA_RUNNER_INSTANCE_URL:-https://lab2025.duckdns.org/git/}"
|
|
local runner_labels="${GITEA_RUNNER_LABELS:-homelab-debian:host}"
|
|
local runner_name="${GITEA_RUNNER_NAME:-homelab-debian-my-homelab-configs}"
|
|
local runner_token="${GITEA_RUNNER_REGISTRATION_TOKEN:-${1:-}}"
|
|
local runner_user="${GITEA_RUNNER_USER:-jv}"
|
|
local runner_version="${GITEA_ACT_RUNNER_VERSION:-0.2.11}"
|
|
local missing_packages=""
|
|
|
|
require_debian_server "install-gitea-runner"
|
|
|
|
case "$(dpkg --print-architecture)" in
|
|
amd64)
|
|
runner_arch="linux-amd64"
|
|
;;
|
|
arm64)
|
|
runner_arch="linux-arm64"
|
|
;;
|
|
*)
|
|
echo "Unsupported Debian architecture: $(dpkg --print-architecture)" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
for package in ca-certificates curl git python3; do
|
|
if ! dpkg-query -W -f='${Status}' "$package" 2>/dev/null | grep -q "install ok installed"; then
|
|
missing_packages="$missing_packages $package"
|
|
fi
|
|
done
|
|
if [[ -n "${missing_packages}" ]]; then
|
|
sudo apt-get update
|
|
sudo apt-get install -y --no-install-recommends ${missing_packages}
|
|
fi
|
|
|
|
sudo curl -fsSL \
|
|
-o /usr/local/bin/act_runner \
|
|
"https://gitea.com/gitea/act_runner/releases/download/v${runner_version}/act_runner-${runner_version}-${runner_arch}"
|
|
sudo chmod 0755 /usr/local/bin/act_runner
|
|
sudo chown root:root /usr/local/bin/act_runner
|
|
|
|
sudo -u "${runner_user}" mkdir -p "${runner_home}"
|
|
|
|
if [[ ! -f "${runner_home}/.runner" ]]; then
|
|
if [[ -z "${runner_token}" ]]; then
|
|
echo "Set GITEA_RUNNER_REGISTRATION_TOKEN to the repository-level runner token from Gitea." >&2
|
|
exit 1
|
|
fi
|
|
|
|
sudo -u "${runner_user}" env \
|
|
HOME="/home/${runner_user}" \
|
|
GITEA_RUNNER_HOME="${runner_home}" \
|
|
GITEA_RUNNER_INSTANCE_URL="${runner_instance}" \
|
|
GITEA_RUNNER_REGISTRATION_TOKEN="${runner_token}" \
|
|
GITEA_RUNNER_NAME="${runner_name}" \
|
|
GITEA_RUNNER_LABELS="${runner_labels}" \
|
|
bash -lc 'cd "${GITEA_RUNNER_HOME}" && /usr/local/bin/act_runner register --no-interactive --instance "${GITEA_RUNNER_INSTANCE_URL}" --token "${GITEA_RUNNER_REGISTRATION_TOKEN}" --name "${GITEA_RUNNER_NAME}" --labels "${GITEA_RUNNER_LABELS}"'
|
|
else
|
|
echo "Existing runner registration found at ${runner_home}/.runner; keeping it."
|
|
fi
|
|
|
|
sudo tee /etc/systemd/system/homelab-gitea-runner.service >/dev/null <<SERVICE_EOT
|
|
[Unit]
|
|
Description=Homelab Gitea Actions runner for my-homelab-configs
|
|
After=network-online.target docker.service
|
|
Wants=network-online.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=${runner_user}
|
|
Group=${runner_user}
|
|
WorkingDirectory=${runner_home}
|
|
Environment=HOME=/home/${runner_user}
|
|
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
ExecStart=/usr/local/bin/act_runner daemon
|
|
Restart=always
|
|
RestartSec=5
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
SERVICE_EOT
|
|
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl enable --now homelab-gitea-runner.service >/dev/null
|
|
sudo systemctl status homelab-gitea-runner.service --no-pager -l
|
|
}
|
|
|
|
recreate_pods_for_selector() {
|
|
local namespace="$1"
|
|
local selector="$2"
|
|
local app="$3"
|
|
|
|
if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then
|
|
echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2
|
|
dump_argocd_debug "${app}"
|
|
dump_namespace_debug "${namespace}"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
refresh_argocd_application() {
|
|
local app="$1"
|
|
|
|
kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null
|
|
}
|
|
|
|
up() {
|
|
local buildx_builder_ready=false
|
|
local demos_image_built=false
|
|
local demos_image_ref
|
|
local demos_image_state_file
|
|
local demos_platforms
|
|
local demos_registry_endpoint
|
|
local demos_source_hash
|
|
local registry_endpoint
|
|
local website_image_built=false
|
|
local website_image_ref
|
|
local website_image_state_file
|
|
local website_platforms
|
|
local website_source_hash
|
|
|
|
require_debian_server "up"
|
|
|
|
registry_endpoint="$(website_registry_endpoint)"
|
|
demos_registry_endpoint="$(demos_registry_endpoint)"
|
|
demos_image_ref="${registry_endpoint}/demos-static:latest"
|
|
demos_image_state_file="${REPO_ROOT}/.lab/demos-static-image.state"
|
|
demos_platforms="${DEMOS_IMAGE_PLATFORMS:-linux/arm64}"
|
|
demos_source_hash="$(demos_source_hash)"
|
|
website_image_ref="${registry_endpoint}/php-website:latest"
|
|
website_image_state_file="${REPO_ROOT}/.lab/php-website-image.state"
|
|
website_platforms="${WEBSITE_IMAGE_PLATFORMS:-linux/arm64}"
|
|
website_source_hash="$(website_source_hash)"
|
|
export TF_VAR_registry_endpoint="${TF_VAR_registry_endpoint:-${registry_endpoint}}"
|
|
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
|
|
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
|
|
|
|
if [[ "${TF_VAR_registry_endpoint}" != "${registry_endpoint}" ]]; then
|
|
echo "TF_VAR_registry_endpoint must match apps/website/web-app.yaml (${registry_endpoint})" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${demos_registry_endpoint}" != "${registry_endpoint}" ]]; then
|
|
echo "apps/demos-static/web-app.yaml registry endpoint (${demos_registry_endpoint}) must match apps/website/web-app.yaml (${registry_endpoint})" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Deploying the homelab infrastructure..."
|
|
|
|
run_tofu_stack "bootstrap/cluster"
|
|
run_tofu_stack "bootstrap/platform"
|
|
apply_gitea_bootstrap_manifests
|
|
install_gitea_backup_timer
|
|
run_tofu_stack "bootstrap/apps"
|
|
|
|
refresh_argocd_application container-registry
|
|
refresh_argocd_application demos-static
|
|
refresh_argocd_application gitea
|
|
refresh_argocd_application website-production
|
|
|
|
wait_for_namespace container-registry container-registry 300
|
|
wait_for_namespaced_resource container-registry deployment local-registry container-registry 300
|
|
wait_for_deployment_ready container-registry local-registry container-registry 300
|
|
|
|
if website_image_is_current "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}" "${registry_endpoint}"; then
|
|
echo "Website image ${website_image_ref} is already current (${website_source_hash}); skipping build."
|
|
else
|
|
echo "Building website image ${website_image_ref} for ${website_platforms} (${website_source_hash})..."
|
|
ensure_docker_build_space
|
|
if [[ "${buildx_builder_ready}" != "true" ]]; then
|
|
prepare_buildx_builder "${registry_endpoint}"
|
|
buildx_builder_ready=true
|
|
fi
|
|
|
|
docker buildx build \
|
|
--network host \
|
|
--platform "${website_platforms}" \
|
|
--provenance=false \
|
|
--sbom=false \
|
|
--label "dev.homelab.website.source-hash=${website_source_hash}" \
|
|
-t "${website_image_ref}" \
|
|
-f "${REPO_ROOT}/apps/website/Dockerfile" \
|
|
"${REPO_ROOT}/apps/website/" \
|
|
--push
|
|
website_image_built=true
|
|
fi
|
|
|
|
if demos_image_is_current "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}" "${registry_endpoint}"; then
|
|
echo "Demos image ${demos_image_ref} is already current (${demos_source_hash}); skipping build."
|
|
else
|
|
echo "Building demos image ${demos_image_ref} for ${demos_platforms} (${demos_source_hash})..."
|
|
ensure_docker_build_space
|
|
if [[ "${buildx_builder_ready}" != "true" ]]; then
|
|
prepare_buildx_builder "${registry_endpoint}"
|
|
buildx_builder_ready=true
|
|
fi
|
|
|
|
docker buildx build \
|
|
--network host \
|
|
--platform "${demos_platforms}" \
|
|
--provenance=false \
|
|
--sbom=false \
|
|
--label "dev.homelab.demos.source-hash=${demos_source_hash}" \
|
|
-t "${demos_image_ref}" \
|
|
-f "${REPO_ROOT}/apps/demos-static/Dockerfile" \
|
|
"${REPO_ROOT}/apps/demos-static/" \
|
|
--push
|
|
demos_image_built=true
|
|
fi
|
|
|
|
refresh_argocd_application website-production
|
|
wait_for_namespace website-production website-production 300
|
|
wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300
|
|
if [[ "${website_image_built}" == "true" ]]; then
|
|
recreate_pods_for_selector website-production app=php-website website-production
|
|
else
|
|
echo "Skipping website pod restart because the image did not change."
|
|
fi
|
|
wait_for_deployment_ready website-production php-website-deployment website-production 300
|
|
if [[ "${website_image_built}" == "true" ]]; then
|
|
write_website_image_state "${website_image_state_file}" "${website_source_hash}" "${website_platforms}" "${website_image_ref}"
|
|
fi
|
|
|
|
refresh_argocd_application demos-static
|
|
wait_for_namespace demos-static demos-static 300
|
|
wait_for_namespaced_resource demos-static deployment demos-static demos-static 300
|
|
if [[ "${demos_image_built}" == "true" ]]; then
|
|
recreate_pods_for_selector demos-static app=demos-static demos-static
|
|
else
|
|
echo "Skipping demos pod restart because the image did not change."
|
|
fi
|
|
wait_for_deployment_ready demos-static demos-static demos-static 300
|
|
if [[ "${demos_image_built}" == "true" ]]; then
|
|
write_demos_image_state "${demos_image_state_file}" "${demos_source_hash}" "${demos_platforms}" "${demos_image_ref}"
|
|
fi
|
|
|
|
run_tofu_stack "bootstrap/edge"
|
|
|
|
echo "Deployment successfully completed."
|
|
}
|
|
|
|
nuke() {
|
|
local worker_ssh_targets
|
|
local worker_targets
|
|
local target
|
|
|
|
require_debian_server "nuke"
|
|
|
|
echo "Brutally nuking the homelab infrastructure..."
|
|
worker_ssh_targets="${WORKER_SSH_TARGETS-jv@192.168.100.89}"
|
|
read -r -a worker_targets <<< "${worker_ssh_targets}"
|
|
|
|
echo "--> Terminating local OpenTofu tasks..."
|
|
killall tofu terraform 2>/dev/null || true
|
|
|
|
echo "--> Eviscerating local Kubernetes components..."
|
|
cleanup_node
|
|
sudo rm -f "${KUBECONFIG_PATH}"
|
|
|
|
for target in "${worker_targets[@]}"; do
|
|
echo "--> Eviscerating remote Kubernetes components (${target})..."
|
|
if ! ssh -o ConnectTimeout=5 "${target}" "bash -s" <<'EOF'
|
|
set -euo pipefail
|
|
|
|
cleanup_calico_links() {
|
|
ip link show | awk -F: '/^[0-9]+: cali/ {print $2}' | cut -d@ -f1 | xargs -r -n1 sudo ip link delete 2>/dev/null || true
|
|
sudo ip link delete vxlan.calico 2>/dev/null || true
|
|
sudo ip link delete tunl0 2>/dev/null || true
|
|
sudo ip link delete cni0 2>/dev/null || true
|
|
sudo ip link delete kube-ipvs0 2>/dev/null || true
|
|
ip netns list | awk '/^(cni-|calico)/ {print $1}' | xargs -r -n1 sudo ip netns delete 2>/dev/null || true
|
|
}
|
|
|
|
cleanup_iptables() {
|
|
sudo iptables -F || true
|
|
sudo iptables -X || true
|
|
sudo iptables -t nat -F || true
|
|
sudo iptables -t nat -X || true
|
|
sudo iptables -t mangle -F || true
|
|
sudo iptables -t mangle -X || true
|
|
sudo iptables -t raw -F || true
|
|
sudo iptables -t raw -X || true
|
|
if command -v ipvsadm >/dev/null 2>&1; then
|
|
sudo ipvsadm --clear || true
|
|
fi
|
|
}
|
|
|
|
cleanup_calico_runtime_files() {
|
|
local path
|
|
|
|
for path in /run/calico /var/run/calico; do
|
|
if sudo test -e "${path}"; then
|
|
sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true
|
|
sudo rmdir "${path}" 2>/dev/null || true
|
|
fi
|
|
done
|
|
}
|
|
|
|
restore_node_dns() {
|
|
sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf
|
|
if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then
|
|
sudo rm -f /etc/resolv.conf
|
|
sudo mv /etc/resolv.conf.homelab-k8s-backup /etc/resolv.conf
|
|
fi
|
|
sudo systemctl restart systemd-resolved 2>/dev/null || true
|
|
}
|
|
|
|
cleanup_mounts() {
|
|
if command -v findmnt >/dev/null 2>&1; then
|
|
local mount_root
|
|
while IFS= read -r mountpoint; do
|
|
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
|
|
done < <(
|
|
for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do
|
|
findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true
|
|
done | sort -ru
|
|
)
|
|
fi
|
|
while IFS= read -r mountpoint; do
|
|
sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true
|
|
done < <(find /var/lib/kubelet/pods -mindepth 2 -maxdepth 5 -type d 2>/dev/null || true)
|
|
sudo umount -f /var/lib/containerd/srun/* 2>/dev/null || sudo umount -l /var/lib/containerd/srun/* 2>/dev/null || true
|
|
}
|
|
|
|
sudo kubeadm reset --force || true
|
|
sudo systemctl stop kubelet 2>/dev/null || true
|
|
sudo systemctl stop containerd 2>/dev/null || true
|
|
sudo killall containerd-shim-runc-v2 2>/dev/null || true
|
|
|
|
cleanup_mounts
|
|
|
|
sudo rm -rf \
|
|
/etc/kubernetes/ \
|
|
/var/lib/etcd/ \
|
|
/var/lib/kubelet/ \
|
|
/var/lib/cni/ \
|
|
/etc/cni/net.d \
|
|
/run/flannel \
|
|
/var/lib/calico \
|
|
/var/log/calico \
|
|
/var/lib/containerd/* \
|
|
/run/containerd/* \
|
|
/etc/containerd/certs.d \
|
|
/etc/containerd/config.toml
|
|
cleanup_calico_runtime_files
|
|
sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam
|
|
|
|
cleanup_iptables
|
|
cleanup_calico_links
|
|
restore_node_dns
|
|
|
|
sudo mkdir -p /etc/containerd/certs.d
|
|
sudo systemctl reset-failed kubelet containerd 2>/dev/null || true
|
|
sudo systemctl start containerd 2>/dev/null || true
|
|
EOF
|
|
then
|
|
echo "Remote cleanup failed for ${target}; not deleting OpenTofu state." >&2
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
docker buildx rm lab-builder 2>/dev/null || true
|
|
docker rm -f buildx_buildkit_lab-builder0 2>/dev/null || true
|
|
rm -f "${BUILDX_CONFIG}" || true
|
|
|
|
echo "--> Deleting OpenTofu tracking state files..."
|
|
rm -rf "${REPO_ROOT}"/bootstrap/cluster/terraform.tfstate*
|
|
rm -f "${REPO_ROOT}"/bootstrap/cluster/.terraform.tfstate.lock.info
|
|
rm -rf "${REPO_ROOT}"/bootstrap/cluster/.terraform/
|
|
rm -rf "${REPO_ROOT}"/bootstrap/platform/terraform.tfstate*
|
|
rm -f "${REPO_ROOT}"/bootstrap/platform/.terraform.tfstate.lock.info
|
|
rm -rf "${REPO_ROOT}"/bootstrap/platform/.terraform/
|
|
rm -rf "${REPO_ROOT}"/bootstrap/apps/terraform.tfstate*
|
|
rm -f "${REPO_ROOT}"/bootstrap/apps/.terraform.tfstate.lock.info
|
|
rm -rf "${REPO_ROOT}"/bootstrap/apps/.terraform/
|
|
rm -rf "${REPO_ROOT}"/bootstrap/edge/terraform.tfstate*
|
|
rm -f "${REPO_ROOT}"/bootstrap/edge/.terraform.tfstate.lock.info
|
|
rm -rf "${REPO_ROOT}"/bootstrap/edge/.terraform/
|
|
|
|
echo "Destruction complete. Retained data under /var/openebs/local was left intact."
|
|
}
|
|
|
|
case "${1:-}" in
|
|
up)
|
|
up
|
|
;;
|
|
backup-gitea)
|
|
backup_gitea
|
|
;;
|
|
install-gitea-runner)
|
|
install_gitea_runner "${2:-}"
|
|
;;
|
|
nuke)
|
|
nuke
|
|
;;
|
|
*)
|
|
echo "Usage: $0 {up|backup-gitea|install-gitea-runner|nuke}"
|
|
exit 1
|
|
;;
|
|
esac
|