diff --git a/README.md b/README.md index bb7760d..28c6d27 100644 --- a/README.md +++ b/README.md @@ -212,12 +212,12 @@ duplicate those PV manifests when you want storage on another node. - `node-role.kubernetes.io/worker=worker` on every worker so `kubectl get nodes` shows `worker` instead of `` in the ROLES column -- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the - Debian control plane -- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the - Raspberry Pi worker -- `homelab.dev/node-role=app` and `homelab.dev/storage=nvme` on automated Pimox - worker clones +- `homelab.dev/node-role=control-plane`, `homelab.dev/storage=local`, and + `homelab.dev/workload-class=control-plane` on the Debian control plane +- `homelab.dev/node-role=edge-app`, `homelab.dev/storage=local`, and + `homelab.dev/workload-class=edge` on the Raspberry Pi worker +- `homelab.dev/node-role=app`, `homelab.dev/storage=nvme`, and + `homelab.dev/workload-class=platform` on automated Pimox worker clones Override `control_plane_node_labels`, `worker_node_labels`, `LAB_RASPBERRY_NODE_LABELS_JSON`, or `LAB_PIMOX_WORKER_NODE_LABELS_JSON` when @@ -227,14 +227,18 @@ OpenEBS hostpath PVs are node-local. Move workloads only after their storage and edge path are ready on the target node. Gitea is outside Kubernetes and is moved by changing the Raspberry Pi Docker install target instead. -The Prometheus stack control workloads are pinned to Pimox worker nodes by the -default `prometheus_stack_node_selector` (`homelab.dev/node-role=app` and -`homelab.dev/storage=nvme`). Because the Prometheus, Alertmanager, and Grafana -PVCs use retained local OpenEBS volumes, moving an existing install off the -Debian control plane requires discarding those PVCs. Run +The stateless platform controllers are pinned to Pimox worker nodes through +`homelab.dev/workload-class=platform` and include hostname topology spread plus +preferred pod anti-affinity so future Argo CD, Kyverno, Prometheus operator, and +kube-state-metrics scheduling does not collapse onto the first worker that joins. +PVC-backed monitoring StatefulSets are intentionally treated separately because +their retained OpenEBS hostpath volumes are node-local. Run `./lab.sh move-prometheus-stack-workers` from the Debian host to label existing worker nodes, destroy only the existing `prometheus-stack` Helm release, delete -its retained PVC/PV objects, and recreate the stack on the worker selector. +its retained PVC/PV objects, and recreate the stack on the worker selector when +you intentionally accept losing that monitoring data. A planned monitoring data +migration should be handled as a separate maintenance task with backup, +delete/recreate or storage migration steps, and post-restore checks. The website and demos NodePorts are reachable from the OCI jump box through the Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent diff --git a/bootstrap/cluster/variables.tf b/bootstrap/cluster/variables.tf index c4bb0bf..f57685c 100644 --- a/bootstrap/cluster/variables.tf +++ b/bootstrap/cluster/variables.tf @@ -6,8 +6,9 @@ variable "control_plane_node_name" { variable "control_plane_node_labels" { type = map(string) default = { - "homelab.dev/node-role" = "control-plane" - "homelab.dev/storage" = "local" + "homelab.dev/node-role" = "control-plane" + "homelab.dev/storage" = "local" + "homelab.dev/workload-class" = "control-plane" } } diff --git a/bootstrap/platform/main.tf b/bootstrap/platform/main.tf index 9d9c74a..87c8cda 100644 --- a/bootstrap/platform/main.tf +++ b/bootstrap/platform/main.tf @@ -105,15 +105,169 @@ EOT var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "", ])) + platform_topology_key = "kubernetes.io/hostname" prometheus_stack_node_selector = var.prometheus_stack_node_selector argocd_node_selector = { - "kubernetes.io/os" = "linux" - "homelab.dev/node-role" = "app" + "kubernetes.io/os" = "linux" + "homelab.dev/workload-class" = "platform" } kyverno_node_selector = { - "kubernetes.io/os" = "linux" - "homelab.dev/node-role" = "app" + "kubernetes.io/os" = "linux" + "homelab.dev/workload-class" = "platform" } + + argocd_component_label_values = { + application_set = "argocd-applicationset-controller" + controller = "argocd-application-controller" + dex = "argocd-dex-server" + notifications = "argocd-notifications-controller" + redis = "argocd-redis" + repo_server = "argocd-repo-server" + server = "argocd-server" + } + + argocd_component_match_labels = { + for component, name in local.argocd_component_label_values : component => { + "app.kubernetes.io/name" = name + } + } + + argocd_component_affinity = { + for component, labels in local.argocd_component_match_labels : component => { + podAntiAffinity = { + preferredDuringSchedulingIgnoredDuringExecution = [ + { + weight = 100 + podAffinityTerm = { + labelSelector = { + matchLabels = labels + } + topologyKey = local.platform_topology_key + } + }, + ] + } + } + } + + argocd_component_topology_spread_constraints = { + for component, labels in local.argocd_component_match_labels : component => [ + { + maxSkew = 1 + topologyKey = local.platform_topology_key + whenUnsatisfiable = "ScheduleAnyway" + labelSelector = { + matchLabels = labels + } + }, + ] + } + + kyverno_component_label_values = { + admissionController = "admission-controller" + backgroundController = "background-controller" + cleanupController = "cleanup-controller" + reportsController = "reports-controller" + } + + kyverno_component_match_labels = { + for component, name in local.kyverno_component_label_values : component => { + "app.kubernetes.io/component" = name + } + } + + kyverno_component_pod_anti_affinity = { + for component, labels in local.kyverno_component_match_labels : component => { + preferredDuringSchedulingIgnoredDuringExecution = [ + { + weight = 100 + podAffinityTerm = { + labelSelector = { + matchLabels = labels + } + topologyKey = local.platform_topology_key + } + }, + ] + } + } + + kyverno_component_topology_spread_constraints = { + for component, labels in local.kyverno_component_match_labels : component => [ + { + maxSkew = 1 + topologyKey = local.platform_topology_key + whenUnsatisfiable = "ScheduleAnyway" + labelSelector = { + matchLabels = labels + } + }, + ] + } + + prometheus_operator_match_labels = { + app = "kube-prometheus-stack-operator" + release = "prometheus-stack" + } + + kube_state_metrics_match_labels = { + "app.kubernetes.io/instance" = "prometheus-stack" + "app.kubernetes.io/name" = "kube-state-metrics" + } + + prometheus_operator_affinity = { + podAntiAffinity = { + preferredDuringSchedulingIgnoredDuringExecution = [ + { + weight = 100 + podAffinityTerm = { + labelSelector = { + matchLabels = local.prometheus_operator_match_labels + } + topologyKey = local.platform_topology_key + } + }, + ] + } + } + + kube_state_metrics_affinity = { + podAntiAffinity = { + preferredDuringSchedulingIgnoredDuringExecution = [ + { + weight = 100 + podAffinityTerm = { + labelSelector = { + matchLabels = local.kube_state_metrics_match_labels + } + topologyKey = local.platform_topology_key + } + }, + ] + } + } + + prometheus_operator_topology_spread_constraints = [ + { + maxSkew = 1 + topologyKey = local.platform_topology_key + whenUnsatisfiable = "ScheduleAnyway" + labelSelector = { + matchLabels = local.prometheus_operator_match_labels + } + }, + ] + + kube_state_metrics_topology_spread_constraints = [ + { + maxSkew = 1 + topologyKey = local.platform_topology_key + whenUnsatisfiable = "ScheduleAnyway" + labelSelector = { + matchLabels = local.kube_state_metrics_match_labels + } + }, + ] } resource "helm_release" "calico_crds" { @@ -678,6 +832,41 @@ resource "helm_release" "argocd" { global = { nodeSelector = local.argocd_node_selector } + applicationSet = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.application_set + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.application_set + } + controller = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.controller + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.controller + } + dex = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.dex + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.dex + } + notifications = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.notifications + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.notifications + } + redis = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.redis + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.redis + } + repoServer = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.repo_server + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.repo_server + } + server = { + nodeSelector = local.argocd_node_selector + affinity = local.argocd_component_affinity.server + topologySpreadConstraints = local.argocd_component_topology_spread_constraints.server + } }) ] } @@ -788,8 +977,10 @@ resource "helm_release" "kyverno" { } } admissionController = { - nodeSelector = local.kyverno_node_selector - replicas = 1 + nodeSelector = local.kyverno_node_selector + podAntiAffinity = local.kyverno_component_pod_anti_affinity.admissionController + replicas = 1 + topologySpreadConstraints = local.kyverno_component_topology_spread_constraints.admissionController resources = { requests = { cpu = "50m" @@ -801,8 +992,10 @@ resource "helm_release" "kyverno" { } } backgroundController = { - nodeSelector = local.kyverno_node_selector - replicas = 1 + nodeSelector = local.kyverno_node_selector + podAntiAffinity = local.kyverno_component_pod_anti_affinity.backgroundController + replicas = 1 + topologySpreadConstraints = local.kyverno_component_topology_spread_constraints.backgroundController resources = { requests = { cpu = "25m" @@ -814,8 +1007,10 @@ resource "helm_release" "kyverno" { } } cleanupController = { - nodeSelector = local.kyverno_node_selector - replicas = 1 + nodeSelector = local.kyverno_node_selector + podAntiAffinity = local.kyverno_component_pod_anti_affinity.cleanupController + replicas = 1 + topologySpreadConstraints = local.kyverno_component_topology_spread_constraints.cleanupController resources = { requests = { cpu = "10m" @@ -827,8 +1022,10 @@ resource "helm_release" "kyverno" { } } reportsController = { - nodeSelector = local.kyverno_node_selector - replicas = 1 + nodeSelector = local.kyverno_node_selector + podAntiAffinity = local.kyverno_component_pod_anti_affinity.reportsController + replicas = 1 + topologySpreadConstraints = local.kyverno_component_topology_spread_constraints.reportsController resources = { requests = { cpu = "25m" @@ -1134,7 +1331,9 @@ resource "helm_release" "prometheus_stack" { enabled = false } prometheusOperator = { - nodeSelector = local.prometheus_stack_node_selector + affinity = local.prometheus_operator_affinity + nodeSelector = local.prometheus_stack_node_selector + topologySpreadConstraints = local.prometheus_operator_topology_spread_constraints tls = { enabled = false } @@ -1242,7 +1441,9 @@ resource "helm_release" "prometheus_stack" { } } "kube-state-metrics" = { - nodeSelector = local.prometheus_stack_node_selector + affinity = local.kube_state_metrics_affinity + nodeSelector = local.prometheus_stack_node_selector + topologySpreadConstraints = local.kube_state_metrics_topology_spread_constraints } }) ] diff --git a/bootstrap/platform/variables.tf b/bootstrap/platform/variables.tf index e9cbefa..3b6cdcf 100644 --- a/bootstrap/platform/variables.tf +++ b/bootstrap/platform/variables.tf @@ -214,8 +214,8 @@ variable "prometheus_stack_node_selector" { description = "Node selector applied to kube-prometheus-stack control workloads so they stay off the control plane." type = map(string) default = { - "homelab.dev/node-role" = "app" - "homelab.dev/storage" = "nvme" + "kubernetes.io/os" = "linux" + "homelab.dev/workload-class" = "platform" } } diff --git a/lab.sh b/lab.sh index 0201666..8d4c566 100755 --- a/lab.sh +++ b/lab.sh @@ -189,6 +189,7 @@ ensure_homelab_node_labels() { kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=control-plane \ homelab.dev/storage=local \ + homelab.dev/workload-class=control-plane \ --overwrite continue fi @@ -201,11 +202,13 @@ ensure_homelab_node_labels() { kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=app \ homelab.dev/storage=nvme \ + homelab.dev/workload-class=platform \ --overwrite elif [[ "${node}" == "${raspberry_node}" ]]; then kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \ homelab.dev/node-role=edge-app \ homelab.dev/storage=local \ + homelab.dev/workload-class=edge \ --overwrite fi done < <(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}') @@ -691,8 +694,8 @@ write_cluster_worker_var_file() { LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \ LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \ LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \ - LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \ - LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \ + LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\",\"homelab.dev/workload-class\":\"edge\"}}" \ + LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\",\"homelab.dev/workload-class\":\"platform\"}}" \ python3 - "${spec_file}" "${var_file}" <<'PY' import json import os