diff --git a/apps/website/Dockerfile b/apps/website/Dockerfile index 6f4fc6e..99bf3d4 100644 --- a/apps/website/Dockerfile +++ b/apps/website/Dockerfile @@ -16,11 +16,14 @@ RUN ln -sf /usr/bin/php82 /usr/bin/php # Alpine keeps Apache site configs here instead of a2enmod RUN sed -i 's/#LoadModule rewrite_module/LoadModule rewrite_module/' /etc/apache2/httpd.conf && \ - sed -i 's/#LoadModule headers_module/LoadModule headers_module/' /etc/apache2/httpd.conf + sed -i 's/#LoadModule headers_module/LoadModule headers_module/' /etc/apache2/httpd.conf && \ + sed -i 's/DirectoryIndex index.html/DirectoryIndex index.php index.html/' /etc/apache2/httpd.conf # Copy files directly into Alpine's default web root COPY . /var/www/localhost/htdocs/ +RUN rm -f /var/www/localhost/htdocs/index.html + # Set up the database directory permissions RUN mkdir -p /var/www/localhost/htdocs/db && \ chown -R apache:apache /var/www/localhost/htdocs/db && \ diff --git a/apps/website/web-app.yaml b/apps/website/web-app.yaml index 12b7065..0182f44 100644 --- a/apps/website/web-app.yaml +++ b/apps/website/web-app.yaml @@ -20,6 +20,8 @@ spec: labels: app: php-website spec: + nodeSelector: + kubernetes.io/hostname: raspberry affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: # requiredDuringSchedulingIgnoredDuringExecution: @@ -65,6 +67,7 @@ metadata: namespace: website-production spec: type: NodePort + externalTrafficPolicy: Local ports: - port: 80 targetPort: 80 diff --git a/bootstrap/cluster/main.tf b/bootstrap/cluster/main.tf index 3eac5fc..4595834 100644 --- a/bootstrap/cluster/main.tf +++ b/bootstrap/cluster/main.tf @@ -14,14 +14,15 @@ terraform { resource "null_resource" "kubeadm_control_plane" { triggers = { - node_name = var.control_plane_node_name - advertise_address = var.control_plane_advertise_address - pod_network_cidr = var.pod_network_cidr - kubeconfig_path = var.kubeconfig_path - kubeconfig_owner = var.kubeconfig_owner - registry_endpoint = var.registry_endpoint - node_dns_servers = join(" ", var.node_dns_servers) - persistent_volume_dirs = join(",", var.persistent_volume_dirs) + node_name = var.control_plane_node_name + advertise_address = var.control_plane_advertise_address + pod_network_cidr = var.pod_network_cidr + kubeconfig_path = var.kubeconfig_path + kubeconfig_owner = var.kubeconfig_owner + registry_endpoint = var.registry_endpoint + registry_config_version = "6" + node_dns_servers = join(" ", var.node_dns_servers) + persistent_volume_dirs = join(",", var.persistent_volume_dirs) } provisioner "local-exec" { @@ -68,6 +69,138 @@ configure_node_dns() { fi } +remove_containerd_section() { + local section="$1" + local tmp + + tmp="$(mktemp)" + sudo awk -v section="$section" ' + $0 == section { skip = 1; next } + skip && /^\[/ { skip = 0 } + !skip { print } + ' /etc/containerd/config.toml > "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +ensure_containerd_registry_config_path() { + local plugin="$1" + local append_section="$2" + local tmp + + tmp="$(mktemp)" + sudo awk -v plugin="$plugin" -v append_section="$append_section" ' + function is_table(line) { + return line ~ /^[[:space:]]*\[/ + } + function is_target_registry(line) { + return is_table(line) && + index(line, plugin) > 0 && + line ~ /[.]registry[[:space:]]*\]/ + } + BEGIN { + in_target = 0 + found = 0 + wrote = 0 + } + is_target_registry($0) { + if (in_target && !wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + in_target = 1 + found = 1 + wrote = 0 + print + next + } + in_target && is_table($0) { + if (!wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + in_target = 0 + wrote = 0 + } + in_target && $0 ~ /^[[:space:]]*config_path[[:space:]]*=/ { + print " config_path = \"/etc/containerd/certs.d\"" + wrote = 1 + next + } + { print } + END { + if (in_target && !wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + if (!found) { + print "" + print append_section + print " config_path = \"/etc/containerd/certs.d\"" + } + } + ' /etc/containerd/config.toml > "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +containerd_config_version() { + sudo awk -F= ' + /^[[:space:]]*version[[:space:]]*=/ { + gsub(/[[:space:]]/, "", $2) + print $2 + exit + } + ' /etc/containerd/config.toml +} + +reset_containerd_registry_tables() { + local tmp + + tmp="$(mktemp)" + sudo awk ' + function is_registry_table(line) { + return line ~ /^\[plugins\./ && + line ~ /\.registry([.\]]|$)/ && + (line ~ /io[.]containerd[.]grpc[.]v1[.]cri/ || + line ~ /io[.]containerd[.]cri[.]v1[.]images/) + } + is_registry_table($0) { skip = 1; next } + skip && /^\[/ { skip = 0 } + !skip { print } + ' /etc/containerd/config.toml > "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +configure_containerd_registry() { + local registry_endpoint="$1" + local config_version + + sudo mkdir -p /etc/containerd + sudo containerd config default | sudo tee /etc/containerd/config.toml >/dev/null + + sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml + config_version="$(containerd_config_version)" + if [ "$config_version" = "3" ]; then + ensure_containerd_registry_config_path "io.containerd.cri.v1.images" '[plugins."io.containerd.cri.v1.images".registry]' + else + ensure_containerd_registry_config_path "io.containerd.grpc.v1.cri" '[plugins."io.containerd.grpc.v1.cri".registry]' + fi + + sudo mkdir -p "/etc/containerd/certs.d/$registry_endpoint" + sudo tee "/etc/containerd/certs.d/$registry_endpoint/hosts.toml" >/dev/null </dev/null; then + sudo containerd config dump || true + exit 1 + fi + if ! sudo systemctl restart containerd; then + sudo systemctl status containerd --no-pager -l || true + sudo journalctl -u containerd --no-pager -n 160 || true + exit 1 + fi +} + configure_node_dns install_missing_packages open-iscsi nfs-common sudo systemctl enable --now iscsid @@ -103,21 +236,7 @@ if ! getent hosts "${self.triggers.node_name}" >/dev/null; then printf '%s %s\n' "${self.triggers.advertise_address}" "${self.triggers.node_name}" | sudo tee -a /etc/hosts >/dev/null fi -sudo mkdir -p /etc/containerd -if [ ! -f /etc/containerd/config.toml ]; then - sudo containerd config default | sudo tee /etc/containerd/config.toml >/dev/null -fi -sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml -sudo sed -i 's#config_path = ""#config_path = "/etc/containerd/certs.d"#' /etc/containerd/config.toml -sudo mkdir -p /etc/containerd/certs.d/${self.triggers.registry_endpoint} -sudo tee /etc/containerd/certs.d/${self.triggers.registry_endpoint}/hosts.toml >/dev/null < "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +ensure_containerd_registry_config_path() { + local plugin="$1" + local append_section="$2" + local tmp + + tmp="$(mktemp)" + sudo awk -v plugin="$plugin" -v append_section="$append_section" ' + function is_table(line) { + return line ~ /^[[:space:]]*\[/ + } + function is_target_registry(line) { + return is_table(line) && + index(line, plugin) > 0 && + line ~ /[.]registry[[:space:]]*\]/ + } + BEGIN { + in_target = 0 + found = 0 + wrote = 0 + } + is_target_registry($0) { + if (in_target && !wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + in_target = 1 + found = 1 + wrote = 0 + print + next + } + in_target && is_table($0) { + if (!wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + in_target = 0 + wrote = 0 + } + in_target && $0 ~ /^[[:space:]]*config_path[[:space:]]*=/ { + print " config_path = \"/etc/containerd/certs.d\"" + wrote = 1 + next + } + { print } + END { + if (in_target && !wrote) { + print " config_path = \"/etc/containerd/certs.d\"" + } + if (!found) { + print "" + print append_section + print " config_path = \"/etc/containerd/certs.d\"" + } + } + ' /etc/containerd/config.toml > "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +containerd_config_version() { + sudo awk -F= ' + /^[[:space:]]*version[[:space:]]*=/ { + gsub(/[[:space:]]/, "", $2) + print $2 + exit + } + ' /etc/containerd/config.toml +} + +reset_containerd_registry_tables() { + local tmp + + tmp="$(mktemp)" + sudo awk ' + function is_registry_table(line) { + return line ~ /^\[plugins\./ && + line ~ /\.registry([.\]]|$)/ && + (line ~ /io[.]containerd[.]grpc[.]v1[.]cri/ || + line ~ /io[.]containerd[.]cri[.]v1[.]images/) + } + is_registry_table($0) { skip = 1; next } + skip && /^\[/ { skip = 0 } + !skip { print } + ' /etc/containerd/config.toml > "$tmp" + sudo mv "$tmp" /etc/containerd/config.toml +} + +configure_containerd_registry() { + local registry_endpoint="$1" + local config_version + + sudo mkdir -p /etc/containerd + sudo containerd config default | sudo tee /etc/containerd/config.toml >/dev/null + + sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml + config_version="$(containerd_config_version)" + if [ "$config_version" = "3" ]; then + ensure_containerd_registry_config_path "io.containerd.cri.v1.images" '[plugins."io.containerd.cri.v1.images".registry]' + else + ensure_containerd_registry_config_path "io.containerd.grpc.v1.cri" '[plugins."io.containerd.grpc.v1.cri".registry]' + fi + + sudo mkdir -p "/etc/containerd/certs.d/$registry_endpoint" + sudo tee "/etc/containerd/certs.d/$registry_endpoint/hosts.toml" >/dev/null </dev/null; then + sudo containerd config dump || true + exit 1 + fi + if ! sudo systemctl restart containerd; then + sudo systemctl status containerd --no-pager -l || true + sudo journalctl -u containerd --no-pager -n 160 || true + exit 1 + fi +} + configure_node_dns install_missing_packages open-iscsi nfs-common sudo systemctl enable --now iscsid @@ -266,21 +518,7 @@ if ! getent hosts "${self.triggers.node_name}" >/dev/null; then printf '%s %s\n' "${self.triggers.host}" "${self.triggers.node_name}" | sudo tee -a /etc/hosts >/dev/null fi -sudo mkdir -p /etc/containerd -if [ ! -f /etc/containerd/config.toml ]; then - sudo containerd config default | sudo tee /etc/containerd/config.toml >/dev/null -fi -sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml -sudo sed -i 's#config_path = ""#config_path = "/etc/containerd/certs.d"#' /etc/containerd/config.toml -sudo mkdir -p /etc/containerd/certs.d/${self.triggers.registry_endpoint} -sudo tee /etc/containerd/certs.d/${self.triggers.registry_endpoint}/hosts.toml >/dev/null </dev/null || true + sudo rmdir "${path}" 2>/dev/null || true + fi + done +} + restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then @@ -48,9 +59,14 @@ restore_node_dns() { cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then + local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true - done < <(findmnt -Rno TARGET /var/lib/kubelet /var/lib/containerd 2>/dev/null | sort -r) + done < <( + for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do + findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true + done | sort -ru + ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true @@ -73,14 +89,13 @@ cleanup_node() { /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ - /run/calico \ - /var/run/calico \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml + cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables @@ -104,6 +119,106 @@ website_registry_endpoint() { printf '%s\n' "${image%%/*}" } +dump_argocd_debug() { + local app="$1" + + kubectl --kubeconfig "${KUBECONFIG}" -n argocd get application "${app}" -o yaml || true + kubectl --kubeconfig "${KUBECONFIG}" -n argocd describe application "${app}" || true + kubectl --kubeconfig "${KUBECONFIG}" -n argocd get pods -o wide || true + kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs deployment/argocd-repo-server --tail=120 || true + kubectl --kubeconfig "${KUBECONFIG}" -n argocd logs statefulset/argocd-application-controller --tail=120 || true +} + +dump_namespace_debug() { + local namespace="$1" + + kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get all -o wide || true + kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get pvc -o wide || true + kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" describe pods || true + kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true +} + +wait_for_namespace() { + local namespace="$1" + local app="$2" + local timeout_seconds="$3" + local elapsed=0 + + until kubectl --kubeconfig "${KUBECONFIG}" get namespace "${namespace}" >/dev/null 2>&1; do + if ((elapsed >= timeout_seconds)); then + echo "Timed out waiting for namespace ${namespace} from Argo CD app ${app}" >&2 + dump_argocd_debug "${app}" + exit 1 + fi + sleep 5 + elapsed=$((elapsed + 5)) + done +} + +wait_for_namespaced_resource() { + local namespace="$1" + local kind="$2" + local name="$3" + local app="$4" + local timeout_seconds="$5" + local elapsed=0 + + until kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get "${kind}/${name}" >/dev/null 2>&1; do + if ((elapsed >= timeout_seconds)); then + echo "Timed out waiting for ${kind}/${name} in namespace ${namespace} from Argo CD app ${app}" >&2 + dump_argocd_debug "${app}" + kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true + exit 1 + fi + sleep 5 + elapsed=$((elapsed + 5)) + done +} + +wait_for_deployment_ready() { + local namespace="$1" + local deployment="$2" + local app="$3" + local timeout_seconds="$4" + local desired_replicas + local ready_replicas + local elapsed=0 + + desired_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" + desired_replicas="${desired_replicas:-1}" + + until ready_replicas="$(kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" get deployment "${deployment}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null)"; \ + (( ${ready_replicas:-0} >= desired_replicas )); do + if ((elapsed >= timeout_seconds)); then + echo "Timed out waiting for deployment/${deployment} in namespace ${namespace} to have ${desired_replicas} ready replicas" >&2 + dump_argocd_debug "${app}" + dump_namespace_debug "${namespace}" + exit 1 + fi + sleep 5 + elapsed=$((elapsed + 5)) + done +} + +recreate_pods_for_selector() { + local namespace="$1" + local selector="$2" + local app="$3" + + if ! kubectl --kubeconfig "${KUBECONFIG}" -n "${namespace}" delete pod -l "${selector}" --ignore-not-found --wait=true --timeout=120s; then + echo "Failed to recreate pods matching ${selector} in namespace ${namespace}" >&2 + dump_argocd_debug "${app}" + dump_namespace_debug "${namespace}" + exit 1 + fi +} + +refresh_argocd_application() { + local app="$1" + + kubectl --kubeconfig "${KUBECONFIG}" patch application "${app}" -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' >/dev/null +} + up() { local registry_endpoint @@ -141,7 +256,13 @@ EOF run_tofu_stack "bootstrap/platform" run_tofu_stack "bootstrap/apps" - kubectl --kubeconfig "${KUBECONFIG}" -n container-registry rollout status deployment/local-registry --timeout=300s + refresh_argocd_application container-registry + refresh_argocd_application gitea + refresh_argocd_application website-production + + wait_for_namespace container-registry container-registry 300 + wait_for_namespaced_resource container-registry deployment local-registry container-registry 300 + wait_for_deployment_ready container-registry local-registry container-registry 300 docker buildx build \ --network host \ @@ -151,7 +272,11 @@ EOF "${REPO_ROOT}/apps/website/" \ --push - kubectl --kubeconfig "${KUBECONFIG}" patch application website-production -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"sync"}}}' + refresh_argocd_application website-production + wait_for_namespace website-production website-production 300 + wait_for_namespaced_resource website-production deployment php-website-deployment website-production 300 + recreate_pods_for_selector website-production app=php-website website-production + wait_for_deployment_ready website-production php-website-deployment website-production 300 echo "Deployment successfully completed." } @@ -200,6 +325,17 @@ cleanup_iptables() { fi } +cleanup_calico_runtime_files() { + local path + + for path in /run/calico /var/run/calico; do + if sudo test -e "${path}"; then + sudo find "${path}" -path '*/cgroup*' -prune -o -mindepth 1 -exec rm -rf -- {} + 2>/dev/null || true + sudo rmdir "${path}" 2>/dev/null || true + fi + done +} + restore_node_dns() { sudo rm -f /etc/systemd/resolved.conf.d/homelab-k8s.conf if sudo test -e /etc/resolv.conf.homelab-k8s-backup; then @@ -211,9 +347,14 @@ restore_node_dns() { cleanup_mounts() { if command -v findmnt >/dev/null 2>&1; then + local mount_root while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true - done < <(findmnt -Rno TARGET /var/lib/kubelet /var/lib/containerd 2>/dev/null | sort -r) + done < <( + for mount_root in /var/lib/kubelet /var/lib/containerd /run/calico /run/calico/cgroup /var/run/calico /var/run/calico/cgroup; do + findmnt -Rno TARGET "${mount_root}" 2>/dev/null || true + done | sort -ru + ) fi while IFS= read -r mountpoint; do sudo umount -f "${mountpoint}" 2>/dev/null || sudo umount -l "${mountpoint}" 2>/dev/null || true @@ -235,14 +376,13 @@ sudo rm -rf \ /var/lib/cni/ \ /etc/cni/net.d \ /run/flannel \ - /run/calico \ - /var/run/calico \ /var/lib/calico \ /var/log/calico \ /var/lib/containerd/* \ /run/containerd/* \ /etc/containerd/certs.d \ /etc/containerd/config.toml +cleanup_calico_runtime_files sudo rm -f /opt/cni/bin/calico /opt/cni/bin/calico-ipam cleanup_iptables