my-homelab-configs/bootstrap/platform/main.tf

1210 lines
32 KiB
HCL

terraform {
required_version = ">= 1.0"
required_providers {
helm = {
source = "hashicorp/helm"
version = "~> 2.12"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.26"
}
null = {
source = "hashicorp/null"
version = "~> 3.2"
}
}
}
provider "kubernetes" {
config_path = var.kubeconfig_path
}
provider "helm" {
kubernetes {
config_path = var.kubeconfig_path
}
}
locals {
nodelocal_dns_corefile = <<EOT
${var.nodelocal_dns.cluster_domain}:53 {
errors
cache {
success 9984 30
denial 9984 5
}
reload
loop
bind ${var.nodelocal_dns.local_ip} ${var.nodelocal_dns.cluster_dns_ip}
forward . ${var.nodelocal_dns.cluster_dns_ip} {
force_tcp
}
prometheus :9253
health ${var.nodelocal_dns.local_ip}:8080
}
in-addr.arpa:53 {
errors
cache 30
reload
loop
bind ${var.nodelocal_dns.local_ip} ${var.nodelocal_dns.cluster_dns_ip}
forward . ${var.nodelocal_dns.cluster_dns_ip} {
force_tcp
}
prometheus :9253
}
ip6.arpa:53 {
errors
cache 30
reload
loop
bind ${var.nodelocal_dns.local_ip} ${var.nodelocal_dns.cluster_dns_ip}
forward . ${var.nodelocal_dns.cluster_dns_ip} {
force_tcp
}
prometheus :9253
}
.:53 {
errors
cache 30
reload
loop
bind ${var.nodelocal_dns.local_ip} ${var.nodelocal_dns.cluster_dns_ip}
forward . ${join(" ", var.nodelocal_dns.upstream_dns_servers)}
prometheus :9253
}
EOT
}
resource "helm_release" "calico_crds" {
name = "calico-crds"
repository = var.calico.repository
chart = "crd.projectcalico.org.v1"
version = var.calico.version
namespace = var.calico.namespace
create_namespace = true
}
resource "null_resource" "calico_helm_recovery" {
depends_on = [helm_release.calico_crds]
triggers = {
kubeconfig_path = var.kubeconfig_path
namespace = var.calico.namespace
release_name = "calico"
release_version = var.calico.version
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-lc"]
command = <<EOT
set -euo pipefail
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" get secrets \
-l "owner=helm,name=${self.triggers.release_name}" \
-o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.status}{"\n"}{end}' 2>/dev/null |
while IFS=$'\t' read -r secret status; do
case "$status" in
pending-install|pending-upgrade|pending-rollback|failed|uninstalling)
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" delete secret "$secret"
;;
esac
done
EOT
}
}
resource "helm_release" "calico" {
depends_on = [null_resource.calico_helm_recovery]
name = "calico"
repository = var.calico.repository
chart = "tigera-operator"
version = var.calico.version
namespace = var.calico.namespace
create_namespace = true
timeout = 900
wait = false
cleanup_on_fail = true
values = [
yamlencode({
manageCRDs = false
nodeSelector = {
"kubernetes.io/os" = "linux"
"kubernetes.io/hostname" = var.calico_operator_node_name
}
apiServer = {
enabled = false
}
goldmane = {
enabled = false
}
whisker = {
enabled = false
}
installation = {
controlPlaneReplicas = 1
cni = {
type = "Calico"
}
calicoNetwork = {
bgp = "Disabled"
nodeAddressAutodetectionV4 = {
cidrs = var.calico_node_address_autodetection_cidrs
firstFound = false
}
ipPools = [
{
cidr = var.pod_network_cidr
encapsulation = "VXLAN"
}
]
}
}
})
]
}
resource "null_resource" "calico_ready" {
depends_on = [helm_release.calico]
triggers = {
kubeconfig_path = var.kubeconfig_path
calico_version = var.calico.version
pod_network_cidr = var.pod_network_cidr
calico_node_address_autodetection_cidrs = join(",", var.calico_node_address_autodetection_cidrs)
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-lc"]
command = <<EOT
set -euo pipefail
dump_calico_debug() {
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get nodes -o wide || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get pods -A -o wide || true
for ns in tigera-operator calico-system kube-system; do
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$ns" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
done
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator describe deployment tigera-operator || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator logs deployment/tigera-operator --tail=160 || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe daemonset calico-node || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe deployment calico-kube-controllers || true
}
wait_for_resource() {
kind="$1"
namespace="$2"
name="$3"
timeout_seconds="$4"
elapsed=0
until kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$namespace" get "$kind/$name" >/dev/null 2>&1; do
if [ "$elapsed" -ge "$timeout_seconds" ]; then
echo "Timed out waiting for $kind/$name in namespace $namespace" >&2
dump_calico_debug
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
trap dump_calico_debug ERR
wait_for_resource deployment tigera-operator tigera-operator 300
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator rollout status deployment/tigera-operator --timeout=300s
wait_for_resource daemonset calico-system calico-node 600
wait_for_resource deployment calico-system calico-kube-controllers 600
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status daemonset/calico-node --timeout=600s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status deployment/calico-kube-controllers --timeout=600s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Ready nodes --all --timeout=600s
EOT
}
}
resource "kubernetes_manifest" "nodelocal_dns_service_account" {
for_each = var.nodelocal_dns.enabled ? { enabled = true } : {}
depends_on = [null_resource.calico_ready]
manifest = {
apiVersion = "v1"
kind = "ServiceAccount"
metadata = {
name = "node-local-dns"
namespace = "kube-system"
labels = {
"kubernetes.io/cluster-service" = "true"
"addonmanager.kubernetes.io/mode" = "Reconcile"
"app.kubernetes.io/managed-by" = "opentofu"
"app.kubernetes.io/part-of" = "nodelocal-dns"
"app.kubernetes.io/name" = "node-local-dns"
"homelab.dev/platform-component" = "nodelocal-dns"
"homelab.dev/platform-component-id" = "dns-cache"
}
}
}
}
resource "kubernetes_manifest" "nodelocal_dns_upstream_service" {
for_each = var.nodelocal_dns.enabled ? { enabled = true } : {}
depends_on = [null_resource.calico_ready]
manifest = {
apiVersion = "v1"
kind = "Service"
metadata = {
name = "kube-dns-upstream"
namespace = "kube-system"
labels = {
"k8s-app" = "kube-dns"
"kubernetes.io/cluster-service" = "true"
"addonmanager.kubernetes.io/mode" = "Reconcile"
"kubernetes.io/name" = "KubeDNSUpstream"
}
}
spec = {
ports = [
{
name = "dns"
port = 53
protocol = "UDP"
targetPort = 53
},
{
name = "dns-tcp"
port = 53
protocol = "TCP"
targetPort = 53
},
]
selector = {
"k8s-app" = "kube-dns"
}
}
}
}
resource "kubernetes_manifest" "nodelocal_dns_config_map" {
for_each = var.nodelocal_dns.enabled ? { enabled = true } : {}
depends_on = [null_resource.calico_ready]
manifest = {
apiVersion = "v1"
kind = "ConfigMap"
metadata = {
name = "node-local-dns"
namespace = "kube-system"
labels = {
"addonmanager.kubernetes.io/mode" = "Reconcile"
"app.kubernetes.io/managed-by" = "opentofu"
"app.kubernetes.io/name" = "node-local-dns"
"app.kubernetes.io/part-of" = "nodelocal-dns"
}
}
data = {
Corefile = local.nodelocal_dns_corefile
}
}
}
resource "kubernetes_manifest" "nodelocal_dns_daemonset" {
for_each = var.nodelocal_dns.enabled ? { enabled = true } : {}
depends_on = [
kubernetes_manifest.nodelocal_dns_service_account,
kubernetes_manifest.nodelocal_dns_upstream_service,
kubernetes_manifest.nodelocal_dns_config_map,
]
manifest = {
apiVersion = "apps/v1"
kind = "DaemonSet"
metadata = {
name = "node-local-dns"
namespace = "kube-system"
labels = {
"k8s-app" = "node-local-dns"
"kubernetes.io/cluster-service" = "true"
"addonmanager.kubernetes.io/mode" = "Reconcile"
}
}
spec = {
updateStrategy = {
rollingUpdate = {
maxUnavailable = "10%"
}
}
selector = {
matchLabels = {
"k8s-app" = "node-local-dns"
}
}
template = {
metadata = {
labels = {
"k8s-app" = "node-local-dns"
}
annotations = {
"prometheus.io/port" = "9253"
"prometheus.io/scrape" = "true"
}
}
spec = {
priorityClassName = "system-node-critical"
serviceAccountName = "node-local-dns"
hostNetwork = true
dnsPolicy = "Default"
nodeSelector = {
"kubernetes.io/os" = "linux"
}
tolerations = [
{
key = "CriticalAddonsOnly"
operator = "Exists"
},
{
effect = "NoExecute"
operator = "Exists"
},
{
effect = "NoSchedule"
operator = "Exists"
},
]
containers = [
{
name = "node-cache"
image = var.nodelocal_dns.image
resources = {
requests = {
cpu = "25m"
memory = "5Mi"
}
}
args = [
"-localip",
"${var.nodelocal_dns.local_ip},${var.nodelocal_dns.cluster_dns_ip}",
"-conf",
"/etc/Corefile",
"-upstreamsvc",
"kube-dns-upstream",
]
securityContext = {
capabilities = {
add = ["NET_ADMIN"]
}
}
ports = [
{
containerPort = 53
name = "dns"
protocol = "UDP"
},
{
containerPort = 53
name = "dns-tcp"
protocol = "TCP"
},
{
containerPort = 9253
name = "metrics"
protocol = "TCP"
},
]
livenessProbe = {
httpGet = {
host = var.nodelocal_dns.local_ip
path = "/health"
port = 8080
}
initialDelaySeconds = 60
timeoutSeconds = 5
}
volumeMounts = [
{
mountPath = "/run/xtables.lock"
name = "xtables-lock"
readOnly = false
},
{
mountPath = "/etc/coredns"
name = "config-volume"
},
{
mountPath = "/etc/kube-dns"
name = "kube-dns-config"
},
]
},
]
volumes = [
{
name = "xtables-lock"
hostPath = {
path = "/run/xtables.lock"
type = "FileOrCreate"
}
},
{
name = "kube-dns-config"
configMap = {
name = "kube-dns"
optional = true
}
},
{
name = "config-volume"
configMap = {
name = "node-local-dns"
items = [
{
key = "Corefile"
path = "Corefile.base"
},
]
}
},
]
}
}
}
}
}
resource "kubernetes_manifest" "nodelocal_dns_metrics_service" {
for_each = var.nodelocal_dns.enabled ? { enabled = true } : {}
depends_on = [kubernetes_manifest.nodelocal_dns_daemonset]
manifest = {
apiVersion = "v1"
kind = "Service"
metadata = {
name = "node-local-dns"
namespace = "kube-system"
annotations = {
"prometheus.io/port" = "9253"
"prometheus.io/scrape" = "true"
}
labels = {
"k8s-app" = "node-local-dns"
}
}
spec = {
clusterIP = "None"
ports = [
{
name = "metrics"
port = 9253
targetPort = 9253
},
]
selector = {
"k8s-app" = "node-local-dns"
}
}
}
}
resource "helm_release" "metallb" {
for_each = var.metallb.enabled ? { enabled = true } : {}
depends_on = [null_resource.calico_ready]
name = "metallb"
repository = var.metallb.repository
chart = "metallb"
version = var.metallb.version
namespace = var.metallb.namespace
create_namespace = true
timeout = 600
wait = true
values = [
yamlencode({
frrk8s = {
enabled = false
}
})
]
}
resource "kubernetes_manifest" "metallb_ip_address_pool" {
for_each = var.metallb.enabled && length(var.metallb.address_pool) > 0 ? { enabled = true } : {}
depends_on = [helm_release.metallb]
manifest = {
apiVersion = "metallb.io/v1beta1"
kind = "IPAddressPool"
metadata = {
name = var.metallb.pool_name
namespace = var.metallb.namespace
}
spec = {
addresses = var.metallb.address_pool
}
}
}
resource "kubernetes_manifest" "metallb_l2_advertisement" {
for_each = var.metallb.enabled && var.metallb.l2_advertisement_enabled && length(var.metallb.address_pool) > 0 ? { enabled = true } : {}
depends_on = [kubernetes_manifest.metallb_ip_address_pool]
manifest = {
apiVersion = "metallb.io/v1beta1"
kind = "L2Advertisement"
metadata = {
name = var.metallb.pool_name
namespace = var.metallb.namespace
}
spec = {
ipAddressPools = [var.metallb.pool_name]
}
}
}
resource "helm_release" "openebs" {
depends_on = [null_resource.calico_ready]
name = "openebs"
repository = var.openebs.repository
chart = "openebs"
version = var.openebs.version
namespace = var.openebs.namespace
create_namespace = true
timeout = 600
values = [
yamlencode({
engines = {
local = {
lvm = {
enabled = false
}
zfs = {
enabled = false
}
}
replicated = {
mayastor = {
enabled = false
}
}
}
loki = {
enabled = false
}
alloy = {
enabled = false
}
})
]
}
resource "kubernetes_storage_class_v1" "openebs_hostpath_retain" {
depends_on = [helm_release.openebs]
metadata {
name = var.openebs.retain_storage_class
annotations = {
"openebs.io/cas-type" = "local"
"cas.openebs.io/config" = yamlencode([{ name = "StorageType", value = "hostpath" }, { name = "BasePath", value = var.openebs.base_path }])
"storageclass.kubernetes.io/is-default-class" = "false"
}
}
storage_provisioner = "openebs.io/local"
reclaim_policy = "Retain"
volume_binding_mode = "WaitForFirstConsumer"
allow_volume_expansion = true
}
resource "kubernetes_namespace_v1" "monitoring" {
depends_on = [kubernetes_storage_class_v1.openebs_hostpath_retain]
metadata {
name = var.observability.namespace
}
}
resource "helm_release" "argocd" {
depends_on = [helm_release.openebs]
name = "argocd"
repository = var.argocd.repository
chart = "argo-cd"
version = var.argocd.version
namespace = var.argocd.namespace
create_namespace = true
timeout = 600
}
resource "null_resource" "argocd_ready" {
depends_on = [helm_release.argocd]
triggers = {
kubeconfig_path = var.kubeconfig_path
namespace = var.argocd.namespace
version = var.argocd.version
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-lc"]
command = <<EOT
set -euo pipefail
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Established --timeout=180s crd/applications.argoproj.io
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" rollout status deployment/argocd-repo-server --timeout=300s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" rollout status deployment/argocd-server --timeout=300s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" rollout status statefulset/argocd-application-controller --timeout=300s
EOT
}
}
resource "null_resource" "argocd_private_repo" {
depends_on = [null_resource.argocd_ready]
triggers = {
kubeconfig_path = var.kubeconfig_path
namespace = var.argocd.namespace
secret_name = var.argocd.repo_secret_name
repo_url = var.gitops_repo_url
ssh_key_path = var.gitops_ssh_key_path
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-lc"]
command = <<EOT
set -euo pipefail
repo_url="${self.triggers.repo_url}"
case "$${repo_url}" in
http://*|https://*)
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" create secret generic "${self.triggers.secret_name}" \
--from-literal=type=git \
--from-literal=url="${self.triggers.repo_url}" \
--dry-run=client -o yaml | kubectl --kubeconfig "${self.triggers.kubeconfig_path}" apply -f -
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" label secret "${self.triggers.secret_name}" \
argocd.argoproj.io/secret-type=repository --overwrite
exit 0
;;
esac
repo_target="$${repo_url#ssh://}"
repo_target="$${repo_target#*@}"
repo_target="$${repo_target%%/*}"
repo_host="$${repo_target%%:*}"
if [ -z "$${repo_host}" ]; then
echo "Could not determine GitOps SSH host from $${repo_url}" >&2
exit 1
fi
known_hosts_file="$(mktemp)"
known_hosts_sorted="$(mktemp)"
trap 'rm -f "$${known_hosts_file}" "$${known_hosts_sorted}"' EXIT
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" get configmap argocd-ssh-known-hosts-cm \
-o jsonpath='{.data.ssh_known_hosts}' > "$${known_hosts_file}" 2>/dev/null || true
ssh-keyscan -H "$${repo_host}" >> "$${known_hosts_file}" 2>/dev/null
sort -u "$${known_hosts_file}" > "$${known_hosts_sorted}"
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" create configmap argocd-ssh-known-hosts-cm \
--from-file=ssh_known_hosts="$${known_hosts_sorted}" \
--dry-run=client -o yaml | kubectl --kubeconfig "${self.triggers.kubeconfig_path}" apply -f -
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" create secret generic "${self.triggers.secret_name}" \
--from-literal=type=git \
--from-literal=url="${self.triggers.repo_url}" \
--from-file=sshPrivateKey="${self.triggers.ssh_key_path}" \
--dry-run=client -o yaml | kubectl --kubeconfig "${self.triggers.kubeconfig_path}" apply -f -
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" label secret "${self.triggers.secret_name}" \
argocd.argoproj.io/secret-type=repository --overwrite
EOT
}
}
resource "helm_release" "kyverno" {
depends_on = [null_resource.calico_ready]
name = "kyverno"
repository = var.kyverno.repository
chart = "kyverno"
version = var.kyverno.chart_version
namespace = var.kyverno.namespace
create_namespace = true
timeout = 900
wait = true
values = [
yamlencode({
admissionController = {
replicas = 1
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "384Mi"
}
}
}
backgroundController = {
replicas = 1
resources = {
requests = {
cpu = "25m"
memory = "96Mi"
}
limits = {
memory = "256Mi"
}
}
}
cleanupController = {
replicas = 1
resources = {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "192Mi"
}
}
}
reportsController = {
replicas = 1
resources = {
requests = {
cpu = "25m"
memory = "96Mi"
}
limits = {
memory = "256Mi"
}
}
}
})
]
}
resource "helm_release" "kyverno_policies" {
depends_on = [helm_release.kyverno]
name = "kyverno-policies"
repository = var.kyverno.repository
chart = "kyverno-policies"
version = var.kyverno.policies_version
namespace = var.kyverno.namespace
create_namespace = false
timeout = 600
wait = true
values = [
yamlencode({
podSecurityStandard = "baseline"
podSecuritySeverity = "medium"
validationFailureAction = "Audit"
validationAllowExistingViolations = true
failurePolicy = "Ignore"
})
]
}
resource "helm_release" "loki" {
depends_on = [kubernetes_namespace_v1.monitoring]
name = "loki"
repository = var.observability.loki.repository
chart = var.observability.loki.chart
version = var.observability.loki.version
namespace = var.observability.namespace
create_namespace = false
timeout = 900
wait = true
values = [
yamlencode({
deploymentMode = "SingleBinary"
loki = {
auth_enabled = false
commonConfig = {
replication_factor = 1
}
storage = {
type = "filesystem"
}
schemaConfig = {
configs = [
{
from = "2024-04-01"
store = "tsdb"
object_store = "filesystem"
schema = "v13"
index = {
prefix = "loki_index_"
period = "24h"
}
}
]
}
limits_config = {
retention_period = var.observability.loki.retention_period
}
compactor = {
retention_enabled = true
delete_request_store = "filesystem"
working_directory = "/var/loki/compactor"
}
}
singleBinary = {
replicas = 1
affinity = {}
persistence = {
enabled = true
whenScaled = "Retain"
whenDeleted = "Retain"
enableStatefulSetAutoDeletePVC = false
storageClass = var.openebs.retain_storage_class
size = var.observability.loki.storage_size
}
resources = {
requests = {
cpu = "50m"
memory = "256Mi"
}
limits = {
memory = "768Mi"
}
}
}
read = {
replicas = 0
}
write = {
replicas = 0
}
backend = {
replicas = 0
}
gateway = {
enabled = false
}
chunksCache = {
enabled = false
}
resultsCache = {
enabled = false
}
lokiCanary = {
enabled = false
}
test = {
enabled = false
}
})
]
}
resource "helm_release" "mimir" {
depends_on = [kubernetes_namespace_v1.monitoring]
name = "mimir"
repository = var.observability.mimir.repository
chart = var.observability.mimir.chart
version = var.observability.mimir.version
namespace = var.observability.namespace
create_namespace = false
timeout = 1200
wait = true
values = [
yamlencode({
mimir = {
structuredConfig = {
multitenancy_enabled = false
ingester = {
ring = {
replication_factor = 1
}
}
}
}
alertmanager = {
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.alertmanager_storage_size
}
zoneAwareReplication = {
enabled = false
}
}
ingester = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.ingester_storage_size
}
resources = {
requests = {
cpu = "100m"
memory = "512Mi"
}
limits = {
memory = "1Gi"
}
}
zoneAwareReplication = {
enabled = false
}
}
store_gateway = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.store_gateway_storage_size
}
zoneAwareReplication = {
enabled = false
}
}
compactor = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.compactor_storage_size
}
}
distributor = {
replicas = 1
}
querier = {
replicas = 1
}
query_frontend = {
replicas = 1
}
query_scheduler = {
replicas = 1
}
ruler = {
replicas = 1
}
minio = {
persistence = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.minio_storage_size
}
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "512Mi"
}
}
}
nginx = {
replicas = 1
}
gateway = {
enabled = false
}
rollout_operator = {
enabled = false
}
})
]
}
resource "helm_release" "promtail" {
depends_on = [helm_release.loki]
name = "promtail"
repository = var.observability.promtail.repository
chart = var.observability.promtail.chart
version = var.observability.promtail.version
namespace = var.observability.namespace
create_namespace = false
timeout = 600
wait = true
values = [
yamlencode({
config = {
clients = [
{
url = "http://loki.${var.observability.namespace}.svc:3100/loki/api/v1/push"
}
]
}
resources = {
requests = {
cpu = "25m"
memory = "64Mi"
}
limits = {
memory = "128Mi"
}
}
})
]
}
resource "helm_release" "prometheus_stack" {
depends_on = [helm_release.loki, helm_release.mimir]
name = "prometheus-stack"
repository = var.observability.prometheus.repository
chart = var.observability.prometheus.chart
version = var.observability.prometheus.version
namespace = var.observability.namespace
create_namespace = false
timeout = 1200
wait = true
values = [
yamlencode({
kubeControllerManager = {
enabled = false
}
kubeEtcd = {
enabled = false
}
kubeProxy = {
enabled = false
}
kubeScheduler = {
enabled = false
}
prometheusOperator = {
tls = {
enabled = false
}
admissionWebhooks = {
enabled = false
}
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "384Mi"
}
}
}
alertmanager = {
alertmanagerSpec = {
storage = {
volumeClaimTemplate = {
spec = {
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
resources = {
requests = {
storage = var.observability.prometheus.alertmanager_storage_size
}
}
}
}
}
}
}
prometheus = {
prometheusSpec = {
retention = var.observability.prometheus.retention
resources = {
requests = {
cpu = "100m"
memory = "512Mi"
}
limits = {
memory = "1Gi"
}
}
remoteWrite = var.observability.prometheus.remote_write_mimir_enabled ? [
{
url = "http://mimir-nginx.${var.observability.namespace}.svc/api/v1/push"
}
] : []
storageSpec = {
volumeClaimTemplate = {
spec = {
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
resources = {
requests = {
storage = var.observability.prometheus.storage_size
}
}
}
}
}
}
}
grafana = {
persistence = {
enabled = true
type = "sts"
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
size = var.observability.prometheus.grafana_storage_size
}
additionalDataSources = [
{
name = "Loki"
type = "loki"
access = "proxy"
url = "http://loki.${var.observability.namespace}.svc:3100"
isDefault = false
},
{
name = "Mimir"
type = "prometheus"
access = "proxy"
url = "http://mimir-nginx.${var.observability.namespace}.svc/prometheus"
isDefault = false
}
]
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "384Mi"
}
}
}
})
]
}
resource "helm_release" "extra_tools" {
for_each = var.extra_helm_releases
depends_on = [null_resource.calico_ready]
name = each.key
repository = each.value.repository
chart = each.value.chart
version = each.value.version != "" ? each.value.version : null
namespace = each.value.namespace
create_namespace = each.value.create_namespace
timeout = each.value.timeout
values = each.value.values_yaml != "" ? [each.value.values_yaml] : []
dynamic "set" {
for_each = each.value.set_values
content {
name = set.key
value = set.value
}
}
}