Compare commits
No commits in common. "89fa7563058fe3e05d298ff0275c5ec7d5c82b7b" and "072bcfdbc81a433681390a70be1d55fc4bdae1d1" have entirely different histories.
89fa756305
...
072bcfdbc8
11
README.md
11
README.md
|
|
@ -210,8 +210,6 @@ duplicate those PV manifests when you want storage on another node.
|
|||
|
||||
`bootstrap/cluster` labels nodes with homelab placement metadata:
|
||||
|
||||
- `node-role.kubernetes.io/worker=worker` on every worker so `kubectl get nodes`
|
||||
shows `worker` instead of `<none>` in the ROLES column
|
||||
- `homelab.dev/node-role=control-plane` and `homelab.dev/storage=local` on the
|
||||
Debian control plane
|
||||
- `homelab.dev/node-role=edge-app` and `homelab.dev/storage=local` on the
|
||||
|
|
@ -227,15 +225,6 @@ OpenEBS hostpath PVs are node-local. Move workloads only after their storage and
|
|||
edge path are ready on the target node. Gitea is outside Kubernetes and is moved
|
||||
by changing the Raspberry Pi Docker install target instead.
|
||||
|
||||
The Prometheus stack control workloads are pinned to Pimox worker nodes by the
|
||||
default `prometheus_stack_node_selector` (`homelab.dev/node-role=app` and
|
||||
`homelab.dev/storage=nvme`). Because the Prometheus, Alertmanager, and Grafana
|
||||
PVCs use retained local OpenEBS volumes, moving an existing install off the
|
||||
Debian control plane requires discarding those PVCs. Run
|
||||
`./lab.sh move-prometheus-stack-workers` from the Debian host to label existing
|
||||
worker nodes, destroy only the existing `prometheus-stack` Helm release, delete
|
||||
its retained PVC/PV objects, and recreate the stack on the worker selector.
|
||||
|
||||
The website and demos NodePorts are reachable from the OCI jump box through the
|
||||
Raspberry Pi Tailscale interface. `bootstrap/cluster` installs a persistent
|
||||
`homelab-tailscale-nodeport.service` on the configured worker to restore the
|
||||
|
|
|
|||
|
|
@ -104,8 +104,6 @@ EOT
|
|||
local.metallb_ip_address_pool_manifest,
|
||||
var.metallb.l2_advertisement_enabled ? local.metallb_l2_advertisement_manifest : "",
|
||||
]))
|
||||
|
||||
prometheus_stack_node_selector = var.prometheus_stack_node_selector
|
||||
}
|
||||
|
||||
resource "helm_release" "calico_crds" {
|
||||
|
|
@ -1104,7 +1102,6 @@ resource "helm_release" "prometheus_stack" {
|
|||
enabled = false
|
||||
}
|
||||
prometheusOperator = {
|
||||
nodeSelector = local.prometheus_stack_node_selector
|
||||
tls = {
|
||||
enabled = false
|
||||
}
|
||||
|
|
@ -1123,7 +1120,6 @@ resource "helm_release" "prometheus_stack" {
|
|||
}
|
||||
alertmanager = {
|
||||
alertmanagerSpec = {
|
||||
nodeSelector = local.prometheus_stack_node_selector
|
||||
storage = {
|
||||
volumeClaimTemplate = {
|
||||
spec = {
|
||||
|
|
@ -1141,7 +1137,6 @@ resource "helm_release" "prometheus_stack" {
|
|||
}
|
||||
prometheus = {
|
||||
prometheusSpec = {
|
||||
nodeSelector = local.prometheus_stack_node_selector
|
||||
retention = var.observability.prometheus.retention
|
||||
resources = {
|
||||
requests = {
|
||||
|
|
@ -1173,7 +1168,6 @@ resource "helm_release" "prometheus_stack" {
|
|||
}
|
||||
}
|
||||
grafana = {
|
||||
nodeSelector = local.prometheus_stack_node_selector
|
||||
persistence = {
|
||||
enabled = true
|
||||
type = "sts"
|
||||
|
|
@ -1211,9 +1205,6 @@ resource "helm_release" "prometheus_stack" {
|
|||
}
|
||||
}
|
||||
}
|
||||
"kube-state-metrics" = {
|
||||
nodeSelector = local.prometheus_stack_node_selector
|
||||
}
|
||||
})
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -210,15 +210,6 @@ variable "observability" {
|
|||
}
|
||||
}
|
||||
|
||||
variable "prometheus_stack_node_selector" {
|
||||
description = "Node selector applied to kube-prometheus-stack control workloads so they stay off the control plane."
|
||||
type = map(string)
|
||||
default = {
|
||||
"homelab.dev/node-role" = "app"
|
||||
"homelab.dev/storage" = "nvme"
|
||||
}
|
||||
}
|
||||
|
||||
variable "extra_helm_releases" {
|
||||
type = map(object({
|
||||
repository = string
|
||||
|
|
|
|||
96
lab.sh
96
lab.sh
|
|
@ -174,74 +174,6 @@ adopt_apps_existing_resources() {
|
|||
"demos-static"
|
||||
}
|
||||
|
||||
ensure_homelab_node_labels() {
|
||||
local control_plane_node="${LAB_CONTROL_PLANE_NODE_NAME:-debian}"
|
||||
local raspberry_node="${LAB_RASPBERRY_NODE_NAME:-raspberry}"
|
||||
local prometheus_selector="homelab.dev/node-role=app,homelab.dev/storage=nvme"
|
||||
local node
|
||||
local target_nodes
|
||||
|
||||
echo "Applying homelab labels to existing Kubernetes nodes..."
|
||||
while IFS= read -r node; do
|
||||
[[ -n "${node}" ]] || continue
|
||||
|
||||
if [[ "${node}" == "${control_plane_node}" ]]; then
|
||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||
homelab.dev/node-role=control-plane \
|
||||
homelab.dev/storage=local \
|
||||
--overwrite
|
||||
continue
|
||||
fi
|
||||
|
||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||
node-role.kubernetes.io/worker=worker \
|
||||
--overwrite
|
||||
|
||||
if [[ "${node}" == pimox-worker-* ]]; then
|
||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||
homelab.dev/node-role=app \
|
||||
homelab.dev/storage=nvme \
|
||||
--overwrite
|
||||
elif [[ "${node}" == "${raspberry_node}" ]]; then
|
||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" label node "${node}" \
|
||||
homelab.dev/node-role=edge-app \
|
||||
homelab.dev/storage=local \
|
||||
--overwrite
|
||||
fi
|
||||
done < <(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
|
||||
|
||||
target_nodes="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get nodes -l "${prometheus_selector}" -o name)"
|
||||
if [[ -z "${target_nodes}" ]]; then
|
||||
echo "No nodes match ${prometheus_selector}; refusing to move prometheus-stack." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
delete_prometheus_stack_storage() {
|
||||
local namespace="${1:-monitoring}"
|
||||
local pattern='(^|-)prometheus-stack-(prometheus|alertmanager|grafana)(-|$)|^prometheus-prometheus-stack|^alertmanager-prometheus-stack|^storage-prometheus-stack-grafana'
|
||||
local pvc_names
|
||||
local pv_names
|
||||
|
||||
pvc_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pvc -o name 2>/dev/null |
|
||||
awk -F/ -v pattern="${pattern}" '$2 ~ pattern {print $2}')"
|
||||
pv_names="$(kubectl --kubeconfig "${KUBECONFIG_PATH}" get pv \
|
||||
-o jsonpath='{range .items[?(@.spec.claimRef.namespace=="'"${namespace}"'")]}{.metadata.name}{"\t"}{.spec.claimRef.name}{"\n"}{end}' 2>/dev/null |
|
||||
awk -v pattern="${pattern}" '$2 ~ pattern {print $1}')"
|
||||
|
||||
if [[ -n "${pvc_names}" ]]; then
|
||||
echo "Deleting old prometheus-stack PVCs in ${namespace}; saved Prometheus, Alertmanager, and Grafana data will be discarded..."
|
||||
printf '%s\n' "${pvc_names}" |
|
||||
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" delete pvc --wait=true --timeout=180s
|
||||
fi
|
||||
|
||||
if [[ -n "${pv_names}" ]]; then
|
||||
echo "Deleting old prometheus-stack retained PV objects..."
|
||||
printf '%s\n' "${pv_names}" |
|
||||
xargs -r kubectl --kubeconfig "${KUBECONFIG_PATH}" delete pv --wait=false
|
||||
fi
|
||||
}
|
||||
|
||||
run_tofu_stack() {
|
||||
local stack="$1"
|
||||
local -a apply_args=(-auto-approve)
|
||||
|
|
@ -260,25 +192,6 @@ run_tofu_stack() {
|
|||
tofu -chdir="${REPO_ROOT}/${stack}" apply "${apply_args[@]}"
|
||||
}
|
||||
|
||||
move_prometheus_stack_workers() {
|
||||
local stack="bootstrap/platform"
|
||||
local namespace="${LAB_MONITORING_NAMESPACE:-monitoring}"
|
||||
|
||||
require_debian_server "move-prometheus-stack-workers"
|
||||
|
||||
export TF_VAR_kubeconfig_path="${TF_VAR_kubeconfig_path:-${KUBECONFIG_PATH}}"
|
||||
export KUBECONFIG="${TF_VAR_kubeconfig_path}"
|
||||
|
||||
echo "Moving prometheus-stack off the control plane. Existing prometheus-stack PVC data will be deleted."
|
||||
ensure_homelab_node_labels
|
||||
tofu -chdir="${REPO_ROOT}/${stack}" init
|
||||
adopt_platform_existing_resources
|
||||
tofu -chdir="${REPO_ROOT}/${stack}" destroy -target=helm_release.prometheus_stack -auto-approve
|
||||
delete_prometheus_stack_storage "${namespace}"
|
||||
tofu -chdir="${REPO_ROOT}/${stack}" apply -auto-approve
|
||||
kubectl --kubeconfig "${KUBECONFIG_PATH}" -n "${namespace}" get pods -o wide
|
||||
}
|
||||
|
||||
truthy() {
|
||||
case "${1,,}" in
|
||||
1 | true | yes | on)
|
||||
|
|
@ -691,8 +604,8 @@ write_cluster_worker_var_file() {
|
|||
LAB_RASPBERRY_USER="${LAB_RASPBERRY_USER:-jv}" \
|
||||
LAB_RASPBERRY_NODE_NAME="${LAB_RASPBERRY_NODE_NAME:-raspberry}" \
|
||||
LAB_RASPBERRY_SSH_KEY_PATH="${LAB_RASPBERRY_SSH_KEY_PATH:-/home/jv/.ssh/id_ed25519}" \
|
||||
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
||||
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"node-role.kubernetes.io/worker\":\"worker\",\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
||||
LAB_RASPBERRY_NODE_LABELS_JSON="${LAB_RASPBERRY_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"edge-app\",\"homelab.dev/storage\":\"local\"}}" \
|
||||
LAB_PIMOX_WORKER_NODE_LABELS_JSON="${LAB_PIMOX_WORKER_NODE_LABELS_JSON:-{\"homelab.dev/node-role\":\"app\",\"homelab.dev/storage\":\"nvme\"}}" \
|
||||
python3 - "${spec_file}" "${var_file}" <<'PY'
|
||||
import json
|
||||
import os
|
||||
|
|
@ -2848,9 +2761,6 @@ case "${1:-}" in
|
|||
install-gitea-runner)
|
||||
install_gitea_runner "${2:-}"
|
||||
;;
|
||||
move-prometheus-stack-workers)
|
||||
move_prometheus_stack_workers
|
||||
;;
|
||||
openwrt)
|
||||
openwrt
|
||||
;;
|
||||
|
|
@ -2858,7 +2768,7 @@ case "${1:-}" in
|
|||
nuke
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|move-prometheus-stack-workers|openwrt|nuke}"
|
||||
echo "Usage: $0 {up|rebuild-cluster|apps|deploy-gitea|bootstrap-gitea-repo|backup-gitea|drill-gitea-restore|install-gitea-runner|openwrt|nuke}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
|
|||
Loading…
Reference in New Issue