terraform { required_version = ">= 1.0" required_providers { helm = { source = "hashicorp/helm" version = "~> 2.12" } kubernetes = { source = "hashicorp/kubernetes" version = "~> 2.26" } null = { source = "hashicorp/null" version = "~> 3.2" } } } provider "kubernetes" { config_path = var.kubeconfig_path } provider "helm" { kubernetes { config_path = var.kubeconfig_path } } locals { nodelocal_dns_corefile = </dev/null | while IFS=$'\t' read -r secret status; do case "$status" in pending-install|pending-upgrade|pending-rollback|failed|uninstalling) kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" delete secret "$secret" ;; esac done EOT } } resource "helm_release" "calico" { depends_on = [null_resource.calico_helm_recovery] name = "calico" repository = var.calico.repository chart = "tigera-operator" version = var.calico.version namespace = var.calico.namespace create_namespace = true timeout = 900 wait = false cleanup_on_fail = true values = [ yamlencode({ manageCRDs = false nodeSelector = { "kubernetes.io/os" = "linux" "kubernetes.io/hostname" = var.calico_operator_node_name } apiServer = { enabled = false } goldmane = { enabled = false } whisker = { enabled = false } installation = { controlPlaneReplicas = 1 cni = { type = "Calico" } calicoNetwork = { bgp = "Disabled" nodeAddressAutodetectionV4 = { cidrs = var.calico_node_address_autodetection_cidrs firstFound = false } ipPools = [ { cidr = var.pod_network_cidr encapsulation = "VXLAN" } ] } } }) ] } resource "null_resource" "calico_ready" { depends_on = [helm_release.calico] triggers = { kubeconfig_path = var.kubeconfig_path calico_version = var.calico.version pod_network_cidr = var.pod_network_cidr calico_node_address_autodetection_cidrs = join(",", var.calico_node_address_autodetection_cidrs) } provisioner "local-exec" { interpreter = ["/bin/bash", "-lc"] command = </dev/null | tail -80 || true done kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator describe deployment tigera-operator || true kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator logs deployment/tigera-operator --tail=160 || true kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe daemonset calico-node || true kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe deployment calico-kube-controllers || true } wait_for_resource() { kind="$1" namespace="$2" name="$3" timeout_seconds="$4" elapsed=0 until kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$namespace" get "$kind/$name" >/dev/null 2>&1; do if [ "$elapsed" -ge "$timeout_seconds" ]; then echo "Timed out waiting for $kind/$name in namespace $namespace" >&2 dump_calico_debug exit 1 fi sleep 5 elapsed=$((elapsed + 5)) done } trap dump_calico_debug ERR wait_for_resource deployment tigera-operator tigera-operator 300 kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator rollout status deployment/tigera-operator --timeout=300s wait_for_resource daemonset calico-system calico-node 600 wait_for_resource deployment calico-system calico-kube-controllers 600 kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status daemonset/calico-node --timeout=600s kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status deployment/calico-kube-controllers --timeout=600s kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Ready nodes --all --timeout=600s EOT } } resource "kubernetes_manifest" "nodelocal_dns_service_account" { for_each = var.nodelocal_dns.enabled ? { enabled = true } : {} depends_on = [null_resource.calico_ready] manifest = { apiVersion = "v1" kind = "ServiceAccount" metadata = { name = "node-local-dns" namespace = "kube-system" labels = { "kubernetes.io/cluster-service" = "true" "addonmanager.kubernetes.io/mode" = "Reconcile" "app.kubernetes.io/managed-by" = "opentofu" "app.kubernetes.io/part-of" = "nodelocal-dns" "app.kubernetes.io/name" = "node-local-dns" "homelab.dev/platform-component" = "nodelocal-dns" "homelab.dev/platform-component-id" = "dns-cache" } } } } resource "kubernetes_manifest" "nodelocal_dns_upstream_service" { for_each = var.nodelocal_dns.enabled ? { enabled = true } : {} depends_on = [null_resource.calico_ready] manifest = { apiVersion = "v1" kind = "Service" metadata = { name = "kube-dns-upstream" namespace = "kube-system" labels = { "k8s-app" = "kube-dns" "kubernetes.io/cluster-service" = "true" "addonmanager.kubernetes.io/mode" = "Reconcile" "kubernetes.io/name" = "KubeDNSUpstream" } } spec = { ports = [ { name = "dns" port = 53 protocol = "UDP" targetPort = 53 }, { name = "dns-tcp" port = 53 protocol = "TCP" targetPort = 53 }, ] selector = { "k8s-app" = "kube-dns" } } } } resource "kubernetes_manifest" "nodelocal_dns_config_map" { for_each = var.nodelocal_dns.enabled ? { enabled = true } : {} depends_on = [null_resource.calico_ready] manifest = { apiVersion = "v1" kind = "ConfigMap" metadata = { name = "node-local-dns" namespace = "kube-system" labels = { "addonmanager.kubernetes.io/mode" = "Reconcile" "app.kubernetes.io/managed-by" = "opentofu" "app.kubernetes.io/name" = "node-local-dns" "app.kubernetes.io/part-of" = "nodelocal-dns" } } data = { Corefile = local.nodelocal_dns_corefile } } } resource "kubernetes_manifest" "nodelocal_dns_daemonset" { for_each = var.nodelocal_dns.enabled ? { enabled = true } : {} depends_on = [ kubernetes_manifest.nodelocal_dns_service_account, kubernetes_manifest.nodelocal_dns_upstream_service, kubernetes_manifest.nodelocal_dns_config_map, ] manifest = { apiVersion = "apps/v1" kind = "DaemonSet" metadata = { name = "node-local-dns" namespace = "kube-system" labels = { "k8s-app" = "node-local-dns" "kubernetes.io/cluster-service" = "true" "addonmanager.kubernetes.io/mode" = "Reconcile" } } spec = { updateStrategy = { rollingUpdate = { maxUnavailable = "10%" } } selector = { matchLabels = { "k8s-app" = "node-local-dns" } } template = { metadata = { labels = { "k8s-app" = "node-local-dns" } annotations = { "prometheus.io/port" = "9253" "prometheus.io/scrape" = "true" } } spec = { priorityClassName = "system-node-critical" serviceAccountName = "node-local-dns" hostNetwork = true dnsPolicy = "Default" nodeSelector = { "kubernetes.io/os" = "linux" } tolerations = [ { key = "CriticalAddonsOnly" operator = "Exists" }, { effect = "NoExecute" operator = "Exists" }, { effect = "NoSchedule" operator = "Exists" }, ] containers = [ { name = "node-cache" image = var.nodelocal_dns.image resources = { requests = { cpu = "25m" memory = "5Mi" } } args = [ "-localip", "${var.nodelocal_dns.local_ip},${var.nodelocal_dns.cluster_dns_ip}", "-conf", "/etc/Corefile", "-upstreamsvc", "kube-dns-upstream", ] securityContext = { capabilities = { add = ["NET_ADMIN"] } } ports = [ { containerPort = 53 name = "dns" protocol = "UDP" }, { containerPort = 53 name = "dns-tcp" protocol = "TCP" }, { containerPort = 9253 name = "metrics" protocol = "TCP" }, ] livenessProbe = { httpGet = { host = var.nodelocal_dns.local_ip path = "/health" port = 8080 } initialDelaySeconds = 60 timeoutSeconds = 5 } volumeMounts = [ { mountPath = "/run/xtables.lock" name = "xtables-lock" }, { mountPath = "/etc/coredns" name = "config-volume" }, { mountPath = "/etc/kube-dns" name = "kube-dns-config" }, ] }, ] volumes = [ { name = "xtables-lock" hostPath = { path = "/run/xtables.lock" type = "FileOrCreate" } }, { name = "kube-dns-config" configMap = { name = "kube-dns" optional = true } }, { name = "config-volume" configMap = { name = "node-local-dns" items = [ { key = "Corefile" path = "Corefile.base" }, ] } }, ] } } } } } resource "kubernetes_manifest" "nodelocal_dns_metrics_service" { for_each = var.nodelocal_dns.enabled ? { enabled = true } : {} depends_on = [kubernetes_manifest.nodelocal_dns_daemonset] manifest = { apiVersion = "v1" kind = "Service" metadata = { name = "node-local-dns" namespace = "kube-system" annotations = { "prometheus.io/port" = "9253" "prometheus.io/scrape" = "true" } labels = { "k8s-app" = "node-local-dns" } } spec = { clusterIP = "None" ports = [ { name = "metrics" port = 9253 targetPort = 9253 }, ] selector = { "k8s-app" = "node-local-dns" } } } } resource "helm_release" "metallb" { for_each = var.metallb.enabled ? { enabled = true } : {} depends_on = [null_resource.calico_ready] name = "metallb" repository = var.metallb.repository chart = "metallb" version = var.metallb.version namespace = var.metallb.namespace create_namespace = true timeout = 600 wait = true values = [ yamlencode({ frrk8s = { enabled = false } }) ] } resource "null_resource" "metallb_l2_config" { for_each = var.metallb.enabled && length(var.metallb.address_pool) > 0 ? { enabled = true } : {} depends_on = [helm_release.metallb] triggers = { kubeconfig_path = var.kubeconfig_path manifest_hash = sha256(local.metallb_l2_manifests) } provisioner "local-exec" { interpreter = ["/bin/bash", "-lc"] command = <&2 exit 1 fi known_hosts_file="$(mktemp)" known_hosts_sorted="$(mktemp)" trap 'rm -f "$${known_hosts_file}" "$${known_hosts_sorted}"' EXIT kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" get configmap argocd-ssh-known-hosts-cm \ -o jsonpath='{.data.ssh_known_hosts}' > "$${known_hosts_file}" 2>/dev/null || true ssh-keyscan -H "$${repo_host}" >> "$${known_hosts_file}" 2>/dev/null sort -u "$${known_hosts_file}" > "$${known_hosts_sorted}" kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" create configmap argocd-ssh-known-hosts-cm \ --from-file=ssh_known_hosts="$${known_hosts_sorted}" \ --dry-run=client -o yaml | kubectl --kubeconfig "${self.triggers.kubeconfig_path}" apply -f - kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" create secret generic "${self.triggers.secret_name}" \ --from-literal=type=git \ --from-literal=url="${self.triggers.repo_url}" \ --from-file=sshPrivateKey="${self.triggers.ssh_key_path}" \ --dry-run=client -o yaml | kubectl --kubeconfig "${self.triggers.kubeconfig_path}" apply -f - kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" label secret "${self.triggers.secret_name}" \ argocd.argoproj.io/secret-type=repository --overwrite EOT } } resource "helm_release" "kyverno" { depends_on = [null_resource.calico_ready] name = "kyverno" repository = var.kyverno.repository chart = "kyverno" version = var.kyverno.chart_version namespace = var.kyverno.namespace create_namespace = true timeout = 900 wait = true values = [ yamlencode({ crds = { migration = { nodeSelector = local.kyverno_node_selector } } admissionController = { nodeSelector = local.kyverno_node_selector replicas = 1 resources = { requests = { cpu = "50m" memory = "128Mi" } limits = { memory = "384Mi" } } } backgroundController = { nodeSelector = local.kyverno_node_selector replicas = 1 resources = { requests = { cpu = "25m" memory = "96Mi" } limits = { memory = "256Mi" } } } cleanupController = { nodeSelector = local.kyverno_node_selector replicas = 1 resources = { requests = { cpu = "10m" memory = "64Mi" } limits = { memory = "192Mi" } } } reportsController = { nodeSelector = local.kyverno_node_selector replicas = 1 resources = { requests = { cpu = "25m" memory = "96Mi" } limits = { memory = "256Mi" } } } webhooksCleanup = { nodeSelector = local.kyverno_node_selector } }) ] } resource "helm_release" "kyverno_policies" { depends_on = [helm_release.kyverno] name = "kyverno-policies" repository = var.kyverno.repository chart = "kyverno-policies" version = var.kyverno.policies_version namespace = var.kyverno.namespace create_namespace = false timeout = 600 wait = true values = [ yamlencode({ podSecurityStandard = "baseline" podSecuritySeverity = "medium" validationFailureAction = "Audit" validationAllowExistingViolations = true failurePolicy = "Ignore" }) ] } resource "helm_release" "loki" { depends_on = [kubernetes_namespace_v1.monitoring] name = "loki" repository = var.observability.loki.repository chart = var.observability.loki.chart version = var.observability.loki.version namespace = var.observability.namespace create_namespace = false timeout = 900 wait = true values = [ yamlencode({ deploymentMode = "SingleBinary" loki = { auth_enabled = false commonConfig = { replication_factor = 1 } storage = { type = "filesystem" } schemaConfig = { configs = [ { from = "2024-04-01" store = "tsdb" object_store = "filesystem" schema = "v13" index = { prefix = "loki_index_" period = "24h" } } ] } limits_config = { retention_period = var.observability.loki.retention_period } compactor = { retention_enabled = true delete_request_store = "filesystem" working_directory = "/var/loki/compactor" } } singleBinary = { replicas = 1 affinity = {} persistence = { enabled = true whenScaled = "Retain" whenDeleted = "Retain" enableStatefulSetAutoDeletePVC = false storageClass = var.openebs.retain_storage_class size = var.observability.loki.storage_size } resources = { requests = { cpu = "50m" memory = "256Mi" } limits = { memory = "768Mi" } } } read = { replicas = 0 } write = { replicas = 0 } backend = { replicas = 0 } gateway = { enabled = false } chunksCache = { enabled = false } resultsCache = { enabled = false } lokiCanary = { enabled = false } test = { enabled = false } }) ] } resource "helm_release" "mimir" { count = var.observability.mimir.enabled ? 1 : 0 depends_on = [kubernetes_namespace_v1.monitoring] name = "mimir" repository = var.observability.mimir.repository chart = var.observability.mimir.chart version = var.observability.mimir.version namespace = var.observability.namespace create_namespace = false timeout = 1200 wait = true values = [ yamlencode({ mimir = { structuredConfig = { multitenancy_enabled = false ingester = { ring = { replication_factor = 1 } } } } alertmanager = { persistentVolume = { storageClass = var.openebs.retain_storage_class size = var.observability.mimir.alertmanager_storage_size } zoneAwareReplication = { enabled = false } } ingester = { replicas = 1 persistentVolume = { storageClass = var.openebs.retain_storage_class size = var.observability.mimir.ingester_storage_size } resources = { requests = { cpu = "100m" memory = "512Mi" } limits = { memory = "1Gi" } } zoneAwareReplication = { enabled = false } } store_gateway = { replicas = 1 persistentVolume = { storageClass = var.openebs.retain_storage_class size = var.observability.mimir.store_gateway_storage_size } zoneAwareReplication = { enabled = false } } compactor = { replicas = 1 persistentVolume = { storageClass = var.openebs.retain_storage_class size = var.observability.mimir.compactor_storage_size } } distributor = { replicas = 1 } querier = { replicas = 1 } query_frontend = { replicas = 1 } query_scheduler = { replicas = 1 } ruler = { replicas = 1 } minio = { persistence = { storageClass = var.openebs.retain_storage_class size = var.observability.mimir.minio_storage_size } resources = { requests = { cpu = "50m" memory = "128Mi" } limits = { memory = "512Mi" } } } nginx = { replicas = 1 } gateway = { enabled = false } rollout_operator = { enabled = false } }) ] } resource "helm_release" "promtail" { depends_on = [helm_release.loki] name = "promtail" repository = var.observability.promtail.repository chart = var.observability.promtail.chart version = var.observability.promtail.version namespace = var.observability.namespace create_namespace = false timeout = 600 wait = true values = [ yamlencode({ config = { clients = [ { url = "http://loki.${var.observability.namespace}.svc:3100/loki/api/v1/push" } ] } resources = { requests = { cpu = "25m" memory = "64Mi" } limits = { memory = "128Mi" } } }) ] } resource "helm_release" "prometheus_stack" { depends_on = [helm_release.loki, helm_release.mimir] name = "prometheus-stack" repository = var.observability.prometheus.repository chart = var.observability.prometheus.chart version = var.observability.prometheus.version namespace = var.observability.namespace create_namespace = false timeout = 1200 wait = true values = [ yamlencode({ kubeControllerManager = { enabled = false } kubeEtcd = { enabled = false } kubeProxy = { enabled = false } kubeScheduler = { enabled = false } prometheusOperator = { nodeSelector = local.prometheus_stack_node_selector tls = { enabled = false } admissionWebhooks = { enabled = false } resources = { requests = { cpu = "50m" memory = "128Mi" } limits = { memory = "384Mi" } } } alertmanager = { alertmanagerSpec = { nodeSelector = local.prometheus_stack_node_selector storage = { volumeClaimTemplate = { spec = { storageClassName = var.openebs.retain_storage_class accessModes = ["ReadWriteOnce"] resources = { requests = { storage = var.observability.prometheus.alertmanager_storage_size } } } } } } } prometheus = { prometheusSpec = { nodeSelector = local.prometheus_stack_node_selector retention = var.observability.prometheus.retention resources = { requests = { cpu = "100m" memory = "512Mi" } limits = { memory = "1Gi" } } remoteWrite = var.observability.mimir.enabled && var.observability.prometheus.remote_write_mimir_enabled ? [ { url = "http://mimir-nginx.${var.observability.namespace}.svc/api/v1/push" } ] : [] storageSpec = { volumeClaimTemplate = { spec = { storageClassName = var.openebs.retain_storage_class accessModes = ["ReadWriteOnce"] resources = { requests = { storage = var.observability.prometheus.storage_size } } } } } } } grafana = { nodeSelector = local.prometheus_stack_node_selector persistence = { enabled = true type = "sts" storageClassName = var.openebs.retain_storage_class accessModes = ["ReadWriteOnce"] size = var.observability.prometheus.grafana_storage_size } additionalDataSources = concat( [ { name = "Loki" type = "loki" access = "proxy" url = "http://loki.${var.observability.namespace}.svc:3100" isDefault = false } ], var.observability.mimir.enabled ? [ { name = "Mimir" type = "prometheus" access = "proxy" url = "http://mimir-nginx.${var.observability.namespace}.svc/prometheus" isDefault = false } ] : [] ) resources = { requests = { cpu = "50m" memory = "128Mi" } limits = { memory = "384Mi" } } } "kube-state-metrics" = { nodeSelector = local.prometheus_stack_node_selector } }) ] } resource "helm_release" "extra_tools" { for_each = var.extra_helm_releases depends_on = [null_resource.calico_ready] name = each.key repository = each.value.repository chart = each.value.chart version = each.value.version != "" ? each.value.version : null namespace = each.value.namespace create_namespace = each.value.create_namespace timeout = each.value.timeout values = each.value.values_yaml != "" ? [each.value.values_yaml] : [] dynamic "set" { for_each = each.value.set_values content { name = set.key value = set.value } } }