fixing stale calico helm

This commit is contained in:
juvdiaz 2026-05-23 22:34:48 -06:00
parent 4881c3cea1
commit 66d7dd39a7
1 changed files with 72 additions and 2 deletions

View File

@ -35,15 +35,46 @@ resource "helm_release" "calico_crds" {
create_namespace = true
}
resource "null_resource" "calico_helm_recovery" {
depends_on = [helm_release.calico_crds]
triggers = {
always = timestamp()
kubeconfig_path = var.kubeconfig_path
namespace = var.calico.namespace
release_name = "calico"
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-lc"]
command = <<EOT
set -euo pipefail
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" get secrets \
-l "owner=helm,name=${self.triggers.release_name}" \
-o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.status}{"\n"}{end}' 2>/dev/null |
while IFS=$'\t' read -r secret status; do
case "$status" in
pending-install|pending-upgrade|pending-rollback|failed|uninstalling)
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" delete secret "$secret"
;;
esac
done
EOT
}
}
resource "helm_release" "calico" {
depends_on = [helm_release.calico_crds]
depends_on = [null_resource.calico_helm_recovery]
name = "calico"
repository = var.calico.repository
chart = "tigera-operator"
version = var.calico.version
namespace = var.calico.namespace
create_namespace = true
timeout = 600
timeout = 900
wait = false
cleanup_on_fail = true
values = [
yamlencode({
@ -90,6 +121,45 @@ resource "null_resource" "calico_ready" {
command = <<EOT
set -euo pipefail
dump_calico_debug() {
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get nodes -o wide || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get pods -A -o wide || true
for ns in tigera-operator calico-system kube-system; do
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$ns" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
done
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator describe deployment tigera-operator || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator logs deployment/tigera-operator --tail=160 || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe daemonset calico-node || true
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe deployment calico-kube-controllers || true
}
wait_for_resource() {
kind="$1"
namespace="$2"
name="$3"
timeout_seconds="$4"
elapsed=0
until kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$namespace" get "$kind/$name" >/dev/null 2>&1; do
if [ "$elapsed" -ge "$timeout_seconds" ]; then
echo "Timed out waiting for $kind/$name in namespace $namespace" >&2
dump_calico_debug
exit 1
fi
sleep 5
elapsed=$((elapsed + 5))
done
}
trap dump_calico_debug ERR
wait_for_resource deployment tigera-operator tigera-operator 300
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator rollout status deployment/tigera-operator --timeout=300s
wait_for_resource daemonset calico-system calico-node 600
wait_for_resource deployment calico-system calico-kube-controllers 600
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status daemonset/calico-node --timeout=600s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status deployment/calico-kube-controllers --timeout=600s
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Ready nodes --all --timeout=600s