Add observability platform stack

This commit is contained in:
juvdiaz 2026-05-25 21:50:53 -06:00
parent de4e9854e7
commit 009e6ce927
2 changed files with 446 additions and 1 deletions

View File

@ -231,6 +231,14 @@ resource "kubernetes_storage_class_v1" "openebs_hostpath_retain" {
allow_volume_expansion = true allow_volume_expansion = true
} }
resource "kubernetes_namespace_v1" "monitoring" {
depends_on = [kubernetes_storage_class_v1.openebs_hostpath_retain]
metadata {
name = var.observability.namespace
}
}
resource "helm_release" "argocd" { resource "helm_release" "argocd" {
depends_on = [helm_release.openebs] depends_on = [helm_release.openebs]
name = "argocd" name = "argocd"
@ -328,6 +336,370 @@ EOT
} }
} }
resource "helm_release" "loki" {
depends_on = [kubernetes_namespace_v1.monitoring]
name = "loki"
repository = var.observability.loki.repository
chart = var.observability.loki.chart
version = var.observability.loki.version
namespace = var.observability.namespace
create_namespace = false
timeout = 900
wait = true
values = [
yamlencode({
deploymentMode = "SingleBinary"
loki = {
auth_enabled = false
commonConfig = {
replication_factor = 1
}
storage = {
type = "filesystem"
}
schemaConfig = {
configs = [
{
from = "2024-04-01"
store = "tsdb"
object_store = "filesystem"
schema = "v13"
index = {
prefix = "loki_index_"
period = "24h"
}
}
]
}
limits_config = {
retention_period = var.observability.loki.retention_period
}
compactor = {
retention_enabled = true
delete_request_store = "filesystem"
working_directory = "/var/loki/compactor"
}
}
singleBinary = {
replicas = 1
affinity = {}
persistence = {
enabled = true
whenScaled = "Retain"
whenDeleted = "Retain"
enableStatefulSetAutoDeletePVC = false
storageClass = var.openebs.retain_storage_class
size = var.observability.loki.storage_size
}
resources = {
requests = {
cpu = "50m"
memory = "256Mi"
}
limits = {
memory = "768Mi"
}
}
}
read = {
replicas = 0
}
write = {
replicas = 0
}
backend = {
replicas = 0
}
gateway = {
enabled = false
}
chunksCache = {
enabled = false
}
resultsCache = {
enabled = false
}
lokiCanary = {
enabled = false
}
test = {
enabled = false
}
})
]
}
resource "helm_release" "mimir" {
depends_on = [kubernetes_namespace_v1.monitoring]
name = "mimir"
repository = var.observability.mimir.repository
chart = var.observability.mimir.chart
version = var.observability.mimir.version
namespace = var.observability.namespace
create_namespace = false
timeout = 1200
wait = true
values = [
yamlencode({
mimir = {
structuredConfig = {
multitenancy_enabled = false
ingester = {
ring = {
replication_factor = 1
}
}
}
}
alertmanager = {
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.alertmanager_storage_size
}
zoneAwareReplication = {
enabled = false
}
}
ingester = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.ingester_storage_size
}
resources = {
requests = {
cpu = "100m"
memory = "512Mi"
}
limits = {
memory = "1Gi"
}
}
zoneAwareReplication = {
enabled = false
}
}
store_gateway = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.store_gateway_storage_size
}
zoneAwareReplication = {
enabled = false
}
}
compactor = {
replicas = 1
persistentVolume = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.compactor_storage_size
}
}
distributor = {
replicas = 1
}
querier = {
replicas = 1
}
query_frontend = {
replicas = 1
}
query_scheduler = {
replicas = 1
}
ruler = {
replicas = 1
}
minio = {
persistence = {
storageClass = var.openebs.retain_storage_class
size = var.observability.mimir.minio_storage_size
}
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "512Mi"
}
}
}
nginx = {
replicas = 1
}
gateway = {
enabled = false
}
rollout_operator = {
enabled = false
}
})
]
}
resource "helm_release" "promtail" {
depends_on = [helm_release.loki]
name = "promtail"
repository = var.observability.promtail.repository
chart = var.observability.promtail.chart
version = var.observability.promtail.version
namespace = var.observability.namespace
create_namespace = false
timeout = 600
wait = true
values = [
yamlencode({
config = {
clients = [
{
url = "http://loki.${var.observability.namespace}.svc:3100/loki/api/v1/push"
}
]
}
resources = {
requests = {
cpu = "25m"
memory = "64Mi"
}
limits = {
memory = "128Mi"
}
}
})
]
}
resource "helm_release" "prometheus_stack" {
depends_on = [helm_release.loki, helm_release.mimir]
name = "prometheus-stack"
repository = var.observability.prometheus.repository
chart = var.observability.prometheus.chart
version = var.observability.prometheus.version
namespace = var.observability.namespace
create_namespace = false
timeout = 1200
wait = true
values = [
yamlencode({
kubeControllerManager = {
enabled = false
}
kubeEtcd = {
enabled = false
}
kubeProxy = {
enabled = false
}
kubeScheduler = {
enabled = false
}
prometheusOperator = {
admissionWebhooks = {
enabled = false
}
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "384Mi"
}
}
}
alertmanager = {
alertmanagerSpec = {
storage = {
volumeClaimTemplate = {
spec = {
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
resources = {
requests = {
storage = var.observability.prometheus.alertmanager_storage_size
}
}
}
}
}
}
}
prometheus = {
prometheusSpec = {
retention = var.observability.prometheus.retention
resources = {
requests = {
cpu = "100m"
memory = "512Mi"
}
limits = {
memory = "1Gi"
}
}
remoteWrite = var.observability.prometheus.remote_write_mimir_enabled ? [
{
url = "http://mimir-nginx.${var.observability.namespace}.svc/api/v1/push"
}
] : []
storageSpec = {
volumeClaimTemplate = {
spec = {
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
resources = {
requests = {
storage = var.observability.prometheus.storage_size
}
}
}
}
}
}
}
grafana = {
persistence = {
enabled = true
type = "sts"
storageClassName = var.openebs.retain_storage_class
accessModes = ["ReadWriteOnce"]
size = var.observability.prometheus.grafana_storage_size
}
additionalDataSources = [
{
name = "Loki"
type = "loki"
access = "proxy"
url = "http://loki.${var.observability.namespace}.svc:3100"
isDefault = false
},
{
name = "Mimir"
type = "prometheus"
access = "proxy"
url = "http://mimir-nginx.${var.observability.namespace}.svc/prometheus"
isDefault = false
}
]
resources = {
requests = {
cpu = "50m"
memory = "128Mi"
}
limits = {
memory = "384Mi"
}
}
}
})
]
}
resource "helm_release" "extra_tools" { resource "helm_release" "extra_tools" {
for_each = var.extra_helm_releases for_each = var.extra_helm_releases

View File

@ -76,6 +76,80 @@ variable "argocd" {
} }
} }
variable "observability" {
type = object({
namespace = string
prometheus = object({
repository = string
chart = string
version = string
retention = string
storage_size = string
alertmanager_storage_size = string
grafana_storage_size = string
remote_write_mimir_enabled = bool
})
loki = object({
repository = string
chart = string
version = string
storage_size = string
retention_period = string
})
promtail = object({
repository = string
chart = string
version = string
})
mimir = object({
repository = string
chart = string
version = string
minio_storage_size = string
alertmanager_storage_size = string
ingester_storage_size = string
store_gateway_storage_size = string
compactor_storage_size = string
})
})
default = {
namespace = "monitoring"
prometheus = {
repository = "https://prometheus-community.github.io/helm-charts"
chart = "kube-prometheus-stack"
version = "85.3.3"
retention = "7d"
storage_size = "15Gi"
alertmanager_storage_size = "1Gi"
grafana_storage_size = "2Gi"
remote_write_mimir_enabled = true
}
loki = {
repository = "https://grafana.github.io/helm-charts"
chart = "loki"
version = "7.0.0"
storage_size = "10Gi"
retention_period = "168h"
}
promtail = {
repository = "https://grafana.github.io/helm-charts"
chart = "promtail"
version = "6.17.1"
}
mimir = {
repository = "https://grafana.github.io/helm-charts"
chart = "mimir-distributed"
version = "5.8.0"
minio_storage_size = "10Gi"
alertmanager_storage_size = "1Gi"
ingester_storage_size = "4Gi"
store_gateway_storage_size = "4Gi"
compactor_storage_size = "4Gi"
}
}
}
variable "extra_helm_releases" { variable "extra_helm_releases" {
type = map(object({ type = map(object({
repository = string repository = string
@ -90,4 +164,3 @@ variable "extra_helm_releases" {
default = {} default = {}
} }