Compare commits
2 Commits
072bcfdbc8
...
89fa756305
| Author | SHA1 | Date |
|---|---|---|
|
|
89fa756305 | |
|
|
b0a2c44bbb |
11
README.md
11
README.md
|
|
@ -210,6 +210,8 @@ duplicate those PV manifests when you want storage on another node.
|
||||||
|
|
||||||
`bootstrap/cluster` labels nodes with homelab placement metadata:
|
`bootstrap/cluster` labels nodes with homelab placement metadata:
|
||||||
|
|
||||||
|
- `node-role.kubernetes.io/worker=worker` on every worker so `kubectl get nodes`
|
||||||
|
shows `worker` instead of `<none>` in the ROLES column
|
||||||
- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the
|
- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the
|
||||||
Debian control plane
|
Debian control plane
|
||||||
- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the
|
- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the
|
||||||
|
|
@ -225,6 +227,15 @@ OpenEBS hostpath PVs are node-local. Move workloads only after their storage and
|
||||||
edge path are ready on the target node. Gitea is outside Kubernetes and is moved
|
edge path are ready on the target node. Gitea is outside Kubernetes and is moved
|
||||||
by changing the Raspberry Pi Docker install target instead.
|
by changing the Raspberry Pi Docker install target instead.
|
||||||
|
|
||||||
|
The Prometheus stack control workloads are pinned to Pimox worker nodes by the
|
||||||
|
default `prometheus_stack_node_selector` (`homelab.dev/node-role=app` and
|
||||||
|
`homelab.dev/storage=nvme`). Because the Prometheus, Alertmanager, and Grafana
|
||||||
|
PVCs use retained local OpenEBS volumes, moving an existing install off the
|
||||||
|
Debian control plane requires discarding those PVCs. Run
|
||||||
|
`./lab.sh move-prometheus-stack-workers` from the Debian host to label existing
|
||||||
|
worker nodes, destroy only the existing `prometheus-stack` Helm release, delete
|
||||||
|
its retained PVC/PV objects, and recreate the stack on the worker selector.
|
||||||
|
|
||||||
The website and demos NodePorts are reachable from the OCI jump box through the
|
The website and demos NodePorts are reachable from the OCI jump box through the
|
||||||
Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent
|
Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent
|
||||||
`homelab-tailscale-nodeport.service` on the configured worker to restore the
|
`homelab-tailscale-nodeport.service` on the configured worker to restore the
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,8 @@ EOT
|
||||||
local.metallb_ip_address_pool_manifest,
|
local.metallb_ip_address_pool_manifest,
|
||||||
var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "",
|
var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "",
|
||||||
]))
|
]))
|
||||||
|
|
||||||
|
prometheus_stack_node_selector = var.prometheus_stack_node_selector
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "helm_release" "calico_crds" {
|
resource "helm_release" "calico_crds" {
|
||||||
|
|
@ -1102,6 +1104,7 @@ resource "helm_release" "prometheus_stack" {
|
||||||
enabled = false
|
enabled = false
|
||||||
}
|
}
|
||||||
prometheusOperator = {
|
prometheusOperator = {
|
||||||
|
nodeSelector = local.prometheus_stack_node_selector
|
||||||
tls = {
|
tls = {
|
||||||
enabled = false
|
enabled = false
|
||||||
}
|
}
|
||||||
|
|
@ -1120,6 +1123,7 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
alertmanager = {
|
alertmanager = {
|
||||||
alertmanagerSpec = {
|
alertmanagerSpec = {
|
||||||
|
nodeSelector = local.prometheus_stack_node_selector
|
||||||
storage = {
|
storage = {
|
||||||
volumeClaimTemplate = {
|
volumeClaimTemplate = {
|
||||||
spec = {
|
spec = {
|
||||||
|
|
@ -1137,7 +1141,8 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
prometheus = {
|
prometheus = {
|
||||||
prometheusSpec = {
|
prometheusSpec = {
|
||||||
retention = var.observability.prometheus.retention
|
nodeSelector = local.prometheus_stack_node_selector
|
||||||
|
retention = var.observability.prometheus.retention
|
||||||
resources = {
|
resources = {
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "100m"
|
cpu = "100m"
|
||||||
|
|
@ -1168,6 +1173,7 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
grafana = {
|
grafana = {
|
||||||
|
nodeSelector = local.prometheus_stack_node_selector
|
||||||
persistence = {
|
persistence = {
|
||||||
enabled = true
|
enabled = true
|
||||||
type = "sts"
|
type = "sts"
|
||||||
|
|
@ -1205,6 +1211,9 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"kube-state-metrics" = {
|
||||||
|
nodeSelector = local.prometheus_stack_node_selector
|
||||||
|
}
|
||||||
})
|
})
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -210,6 +210,15 @@ variable "observability" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "prometheus_stack_node_selector" {
|
||||||
|
description = "Node selector applied to kube-prometheus-stack control workloads so they stay off the control plane."
|
||||||
|
type = map(string)
|
||||||
|
default = {
|
||||||
|
"homelab.dev/node-role" = "app"
|
||||||
|
"homelab.dev/storage" = "nvme"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
variable "extra_helm_releases" {
|
variable "extra_helm_releases" {
|
||||||
type = map(object({
|
type = map(object({
|
||||||
repository = string
|
repository = string
|
||||||
|
|
|
||||||
96
lab.sh
96
lab.sh
|
|
@ -174,6 +174,74 @@ adopt_apps_existing_resources() {
|
||||||
"demos-static"
|
"demos-static"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ensure_homelab_node_labels() {
|
||||||
|
local control_plane_node="${LAB_CONTROL_PLANE_NODE_NAME:-debian}"
|
||||||
|
local raspberry_node="${LAB_RASPBERRY_NODE_NAME:-raspberry}"
|
||||||
|
local prometheus_selector="homelab.dev/node-role=app,homelab.dev/storage=nvme"
|
||||||
|
local node
|
||||||
|
local target_nodes
|
||||||
|
|
||||||
|
echo "Applying homelab labels to existing Kubernetes nodes..."
|
||||||
|
while IFS= read -r node; do
|
||||||
|
[[ -n "${node}" ]] || continue
|
||||||
|
|
||||||
|
if [[ "${node}" == "${control_plane_node}" ]]; then
|
||||||
|
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||||
|
homelab.dev/node-role=control-plane \
|
||||||
|
homelab.dev/storage=local \
|
||||||
|
--overwrite
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||||
|
node-role.kubernetes.io/worker=worker \
|
||||||
|
--overwrite
|
||||||
|
|
||||||
|
if [[ "${node}" == pimox-worker-* ]]; then
|
||||||
|
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||||
|
homelab.dev/node-role=app \
|
||||||
|
homelab.dev/storage=nvme \
|
||||||
|
--overwrite
|
||||||
|
elif [[ "${node}" == "${raspberry_node}" ]]; then
|
||||||
|
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||||
|
homelab.dev/node-role=edge-app \
|
||||||
|
homelab.dev/storage=local \
|
||||||
|
--overwrite
|
||||||
|
fi
|
||||||
|
done < <(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
|
||||||
|
|
||||||
|
target_nodes="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -l "${prometheus_selector}" -o name)"
|
||||||
|
if [[ -z "${target_nodes}" ]]; then
|
||||||
|
echo "No nodes match ${prometheus_selector}; refusing to move prometheus-stack." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
delete_prometheus_stack_storage() {
|
||||||
|
local namespace="${1:-monitoring}"
|
||||||
|
local pattern='(^|-)prometheus-stack-(prometheus|alertmanager|grafana)(-|$)|^prometheus-prometheus-stack|^alertmanager-prometheus-stack|^storage-prometheus-stack-grafana'
|
||||||
|
local pvc_names
|
||||||
|
local pv_names
|
||||||
|
|
||||||
|
pvc_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pvc -o name 2>/dev/null |
|
||||||
|
awk -F/ -v pattern="${pattern}" '$2 ~ pattern {print $2}')"
|
||||||
|
pv_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get pv \
|
||||||
|
-o jsonpath='{range .items[?(@.spec.claimRef.namespace=="'"${namespace}"'")]}{.metadata.name}{"\t"}{.spec.claimRef.name}{"\n"}{end}' 2>/dev/null |
|
||||||
|
awk -v pattern="${pattern}" '$2 ~ pattern {print $1}')"
|
||||||
|
|
||||||
|
if [[ -n "${pvc_names}" ]]; then
|
||||||
|
echo "Deleting old prometheus-stack PVCs in ${namespace}; saved Prometheus, Alertmanager, and Grafana data will be discarded..."
|
||||||
|
printf '%s\n' "${pvc_names}" |
|
||||||
|
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" delete pvc --wait=true --timeout=180s
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${pv_names}" ]]; then
|
||||||
|
echo "Deleting old prometheus-stack retained PV objects..."
|
||||||
|
printf '%s\n' "${pv_names}" |
|
||||||
|
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" delete pv --wait=false
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
run_tofu_stack() {
|
run_tofu_stack() {
|
||||||
local stack="$1"
|
local stack="$1"
|
||||||
local -a apply_args=(-auto-approve)
|
local -a apply_args=(-auto-approve)
|
||||||
|
|
@ -192,6 +260,25 @@ run_tofu_stack() {
|
||||||
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
|
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
move_prometheus_stack_workers() {
|
||||||
|
local stack="bootstrap/platform"
|
||||||
|
local namespace="${LAB_MONITORING_NAMESPACE:-monitoring}"
|
||||||
|
|
||||||
|
require_debian_server "move-prometheus-stack-workers"
|
||||||
|
|
||||||
|
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
|
||||||
|
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
|
||||||
|
|
||||||
|
echo "Moving prometheus-stack off the control plane. Existing prometheus-stack PVC data will be deleted."
|
||||||
|
ensure_homelab_node_labels
|
||||||
|
tofu -chdir="${REPO_ROOT}/${stack}" init
|
||||||
|
adopt_platform_existing_resources
|
||||||
|
tofu -chdir="${REPO_ROOT}/${stack}" destroy -target=helm_release.prometheus_stack -auto-approve
|
||||||
|
delete_prometheus_stack_storage "${namespace}"
|
||||||
|
tofu -chdir="${REPO_ROOT}/${stack}" apply -auto-approve
|
||||||
|
kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pods -o wide
|
||||||
|
}
|
||||||
|
|
||||||
truthy() {
|
truthy() {
|
||||||
case "${1,,}" in
|
case "${1,,}" in
|
||||||
1 | true | yes | on)
|
1 | true | yes | on)
|
||||||
|
|
@ -604,8 +691,8 @@ write_cluster_worker_var_file() {
|
||||||
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
|
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
|
||||||
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
|
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
|
||||||
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
|
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
|
||||||
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
||||||
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
||||||
python3 - "${spec_file}" "${var_file}" <<'PY'
|
python3 - "${spec_file}" "${var_file}" <<'PY'
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
@ -2761,6 +2848,9 @@ case "${1:-}" in
|
||||||
install-gitea-runner)
|
install-gitea-runner)
|
||||||
install_gitea_runner "${2:-}"
|
install_gitea_runner "${2:-}"
|
||||||
;;
|
;;
|
||||||
|
move-prometheus-stack-workers)
|
||||||
|
move_prometheus_stack_workers
|
||||||
|
;;
|
||||||
openwrt)
|
openwrt)
|
||||||
openwrt
|
openwrt
|
||||||
;;
|
;;
|
||||||
|
|
@ -2768,7 +2858,7 @@ case "${1:-}" in
|
||||||
nuke
|
nuke
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|openwrt|nuke}"
|
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|move-prometheus-stack-workers|openwrt|nuke}"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue