Compare commits
No commits in common. "89fa7563058fe3e05d298ff0275c5ec7d5c82b7b" and "072bcfdbc81a433681390a70be1d55fc4bdae1d1" have entirely different histories.
89fa756305
...
072bcfdbc8
11
README.md
11
README.md
|
|
@ -210,8 +210,6 @@ duplicate those PV manifests when you want storage on another node.
|
||||||
|
|
||||||
`bootstrap/cluster` labels nodes with homelab placement metadata:
|
`bootstrap/cluster` labels nodes with homelab placement metadata:
|
||||||
|
|
||||||
- `node-role.kubernetes.io/worker=worker` on every worker so `kubectl get nodes`
|
|
||||||
shows `worker` instead of `<none>` in the ROLES column
|
|
||||||
- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the
|
- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the
|
||||||
Debian control plane
|
Debian control plane
|
||||||
- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the
|
- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the
|
||||||
|
|
@ -227,15 +225,6 @@ OpenEBS hostpath PVs are node-local. Move workloads only after their storage and
|
||||||
edge path are ready on the target node. Gitea is outside Kubernetes and is moved
|
edge path are ready on the target node. Gitea is outside Kubernetes and is moved
|
||||||
by changing the Raspberry Pi Docker install target instead.
|
by changing the Raspberry Pi Docker install target instead.
|
||||||
|
|
||||||
The Prometheus stack control workloads are pinned to Pimox worker nodes by the
|
|
||||||
default `prometheus_stack_node_selector` (`homelab.dev/node-role=app` and
|
|
||||||
`homelab.dev/storage=nvme`). Because the Prometheus, Alertmanager, and Grafana
|
|
||||||
PVCs use retained local OpenEBS volumes, moving an existing install off the
|
|
||||||
Debian control plane requires discarding those PVCs. Run
|
|
||||||
`./lab.sh move-prometheus-stack-workers` from the Debian host to label existing
|
|
||||||
worker nodes, destroy only the existing `prometheus-stack` Helm release, delete
|
|
||||||
its retained PVC/PV objects, and recreate the stack on the worker selector.
|
|
||||||
|
|
||||||
The website and demos NodePorts are reachable from the OCI jump box through the
|
The website and demos NodePorts are reachable from the OCI jump box through the
|
||||||
Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent
|
Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent
|
||||||
`homelab-tailscale-nodeport.service` on the configured worker to restore the
|
`homelab-tailscale-nodeport.service` on the configured worker to restore the
|
||||||
|
|
|
||||||
|
|
@ -104,8 +104,6 @@ EOT
|
||||||
local.metallb_ip_address_pool_manifest,
|
local.metallb_ip_address_pool_manifest,
|
||||||
var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "",
|
var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "",
|
||||||
]))
|
]))
|
||||||
|
|
||||||
prometheus_stack_node_selector = var.prometheus_stack_node_selector
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "helm_release" "calico_crds" {
|
resource "helm_release" "calico_crds" {
|
||||||
|
|
@ -1104,7 +1102,6 @@ resource "helm_release" "prometheus_stack" {
|
||||||
enabled = false
|
enabled = false
|
||||||
}
|
}
|
||||||
prometheusOperator = {
|
prometheusOperator = {
|
||||||
nodeSelector = local.prometheus_stack_node_selector
|
|
||||||
tls = {
|
tls = {
|
||||||
enabled = false
|
enabled = false
|
||||||
}
|
}
|
||||||
|
|
@ -1123,7 +1120,6 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
alertmanager = {
|
alertmanager = {
|
||||||
alertmanagerSpec = {
|
alertmanagerSpec = {
|
||||||
nodeSelector = local.prometheus_stack_node_selector
|
|
||||||
storage = {
|
storage = {
|
||||||
volumeClaimTemplate = {
|
volumeClaimTemplate = {
|
||||||
spec = {
|
spec = {
|
||||||
|
|
@ -1141,7 +1137,6 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
prometheus = {
|
prometheus = {
|
||||||
prometheusSpec = {
|
prometheusSpec = {
|
||||||
nodeSelector = local.prometheus_stack_node_selector
|
|
||||||
retention = var.observability.prometheus.retention
|
retention = var.observability.prometheus.retention
|
||||||
resources = {
|
resources = {
|
||||||
requests = {
|
requests = {
|
||||||
|
|
@ -1173,7 +1168,6 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
grafana = {
|
grafana = {
|
||||||
nodeSelector = local.prometheus_stack_node_selector
|
|
||||||
persistence = {
|
persistence = {
|
||||||
enabled = true
|
enabled = true
|
||||||
type = "sts"
|
type = "sts"
|
||||||
|
|
@ -1211,9 +1205,6 @@ resource "helm_release" "prometheus_stack" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"kube-state-metrics" = {
|
|
||||||
nodeSelector = local.prometheus_stack_node_selector
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -210,15 +210,6 @@ variable "observability" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "prometheus_stack_node_selector" {
|
|
||||||
description = "Node selector applied to kube-prometheus-stack control workloads so they stay off the control plane."
|
|
||||||
type = map(string)
|
|
||||||
default = {
|
|
||||||
"homelab.dev/node-role" = "app"
|
|
||||||
"homelab.dev/storage" = "nvme"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "extra_helm_releases" {
|
variable "extra_helm_releases" {
|
||||||
type = map(object({
|
type = map(object({
|
||||||
repository = string
|
repository = string
|
||||||
|
|
|
||||||
96
lab.sh
96
lab.sh
|
|
@ -174,74 +174,6 @@ adopt_apps_existing_resources() {
|
||||||
"demos-static"
|
"demos-static"
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_homelab_node_labels() {
|
|
||||||
local control_plane_node="${LAB_CONTROL_PLANE_NODE_NAME:-debian}"
|
|
||||||
local raspberry_node="${LAB_RASPBERRY_NODE_NAME:-raspberry}"
|
|
||||||
local prometheus_selector="homelab.dev/node-role=app,homelab.dev/storage=nvme"
|
|
||||||
local node
|
|
||||||
local target_nodes
|
|
||||||
|
|
||||||
echo "Applying homelab labels to existing Kubernetes nodes..."
|
|
||||||
while IFS= read -r node; do
|
|
||||||
[[ -n "${node}" ]] || continue
|
|
||||||
|
|
||||||
if [[ "${node}" == "${control_plane_node}" ]]; then
|
|
||||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
|
||||||
homelab.dev/node-role=control-plane \
|
|
||||||
homelab.dev/storage=local \
|
|
||||||
--overwrite
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
|
||||||
node-role.kubernetes.io/worker=worker \
|
|
||||||
--overwrite
|
|
||||||
|
|
||||||
if [[ "${node}" == pimox-worker-* ]]; then
|
|
||||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
|
||||||
homelab.dev/node-role=app \
|
|
||||||
homelab.dev/storage=nvme \
|
|
||||||
--overwrite
|
|
||||||
elif [[ "${node}" == "${raspberry_node}" ]]; then
|
|
||||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
|
||||||
homelab.dev/node-role=edge-app \
|
|
||||||
homelab.dev/storage=local \
|
|
||||||
--overwrite
|
|
||||||
fi
|
|
||||||
done < <(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
|
|
||||||
|
|
||||||
target_nodes="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -l "${prometheus_selector}" -o name)"
|
|
||||||
if [[ -z "${target_nodes}" ]]; then
|
|
||||||
echo "No nodes match ${prometheus_selector}; refusing to move prometheus-stack." >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
delete_prometheus_stack_storage() {
|
|
||||||
local namespace="${1:-monitoring}"
|
|
||||||
local pattern='(^|-)prometheus-stack-(prometheus|alertmanager|grafana)(-|$)|^prometheus-prometheus-stack|^alertmanager-prometheus-stack|^storage-prometheus-stack-grafana'
|
|
||||||
local pvc_names
|
|
||||||
local pv_names
|
|
||||||
|
|
||||||
pvc_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pvc -o name 2>/dev/null |
|
|
||||||
awk -F/ -v pattern="${pattern}" '$2 ~ pattern {print $2}')"
|
|
||||||
pv_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get pv \
|
|
||||||
-o jsonpath='{range .items[?(@.spec.claimRef.namespace=="'"${namespace}"'")]}{.metadata.name}{"\t"}{.spec.claimRef.name}{"\n"}{end}' 2>/dev/null |
|
|
||||||
awk -v pattern="${pattern}" '$2 ~ pattern {print $1}')"
|
|
||||||
|
|
||||||
if [[ -n "${pvc_names}" ]]; then
|
|
||||||
echo "Deleting old prometheus-stack PVCs in ${namespace}; saved Prometheus, Alertmanager, and Grafana data will be discarded..."
|
|
||||||
printf '%s\n' "${pvc_names}" |
|
|
||||||
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" delete pvc --wait=true --timeout=180s
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n "${pv_names}" ]]; then
|
|
||||||
echo "Deleting old prometheus-stack retained PV objects..."
|
|
||||||
printf '%s\n' "${pv_names}" |
|
|
||||||
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" delete pv --wait=false
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
run_tofu_stack() {
|
run_tofu_stack() {
|
||||||
local stack="$1"
|
local stack="$1"
|
||||||
local -a apply_args=(-auto-approve)
|
local -a apply_args=(-auto-approve)
|
||||||
|
|
@ -260,25 +192,6 @@ run_tofu_stack() {
|
||||||
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
|
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
move_prometheus_stack_workers() {
|
|
||||||
local stack="bootstrap/platform"
|
|
||||||
local namespace="${LAB_MONITORING_NAMESPACE:-monitoring}"
|
|
||||||
|
|
||||||
require_debian_server "move-prometheus-stack-workers"
|
|
||||||
|
|
||||||
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
|
|
||||||
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
|
|
||||||
|
|
||||||
echo "Moving prometheus-stack off the control plane. Existing prometheus-stack PVC data will be deleted."
|
|
||||||
ensure_homelab_node_labels
|
|
||||||
tofu -chdir="${REPO_ROOT}/${stack}" init
|
|
||||||
adopt_platform_existing_resources
|
|
||||||
tofu -chdir="${REPO_ROOT}/${stack}" destroy -target=helm_release.prometheus_stack -auto-approve
|
|
||||||
delete_prometheus_stack_storage "${namespace}"
|
|
||||||
tofu -chdir="${REPO_ROOT}/${stack}" apply -auto-approve
|
|
||||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pods -o wide
|
|
||||||
}
|
|
||||||
|
|
||||||
truthy() {
|
truthy() {
|
||||||
case "${1,,}" in
|
case "${1,,}" in
|
||||||
1 | true | yes | on)
|
1 | true | yes | on)
|
||||||
|
|
@ -691,8 +604,8 @@ write_cluster_worker_var_file() {
|
||||||
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
|
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
|
||||||
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
|
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
|
||||||
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
|
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
|
||||||
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
||||||
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
||||||
python3 - "${spec_file}" "${var_file}" <<'PY'
|
python3 - "${spec_file}" "${var_file}" <<'PY'
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
@ -2848,9 +2761,6 @@ case "${1:-}" in
|
||||||
install-gitea-runner)
|
install-gitea-runner)
|
||||||
install_gitea_runner "${2:-}"
|
install_gitea_runner "${2:-}"
|
||||||
;;
|
;;
|
||||||
move-prometheus-stack-workers)
|
|
||||||
move_prometheus_stack_workers
|
|
||||||
;;
|
|
||||||
openwrt)
|
openwrt)
|
||||||
openwrt
|
openwrt
|
||||||
;;
|
;;
|
||||||
|
|
@ -2858,7 +2768,7 @@ case "${1:-}" in
|
||||||
nuke
|
nuke
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|move-prometheus-stack-workers|openwrt|nuke}"
|
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|openwrt|nuke}"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue