diff --git a/bootstrap/platform/main.tf b/bootstrap/platform/main.tf index 2b2c979..9870629 100644 --- a/bootstrap/platform/main.tf +++ b/bootstrap/platform/main.tf @@ -35,15 +35,46 @@ resource "helm_release" "calico_crds" { create_namespace = true } +resource "null_resource" "calico_helm_recovery" { + depends_on = [helm_release.calico_crds] + + triggers = { + always = timestamp() + kubeconfig_path = var.kubeconfig_path + namespace = var.calico.namespace + release_name = "calico" + } + + provisioner "local-exec" { + interpreter = ["/bin/bash", "-lc"] + command = </dev/null | +while IFS=$'\t' read -r secret status; do + case "$status" in + pending-install|pending-upgrade|pending-rollback|failed|uninstalling) + kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" delete secret "$secret" + ;; + esac +done +EOT + } +} + resource "helm_release" "calico" { - depends_on = [helm_release.calico_crds] + depends_on = [null_resource.calico_helm_recovery] name = "calico" repository = var.calico.repository chart = "tigera-operator" version = var.calico.version namespace = var.calico.namespace create_namespace = true - timeout = 600 + timeout = 900 + wait = false + cleanup_on_fail = true values = [ yamlencode({ @@ -90,6 +121,45 @@ resource "null_resource" "calico_ready" { command = </dev/null | tail -80 || true + done + + kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator describe deployment tigera-operator || true + kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator logs deployment/tigera-operator --tail=160 || true + kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe daemonset calico-node || true + kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe deployment calico-kube-controllers || true +} + +wait_for_resource() { + kind="$1" + namespace="$2" + name="$3" + timeout_seconds="$4" + elapsed=0 + + until kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$namespace" get "$kind/$name" >/dev/null 2>&1; do + if [ "$elapsed" -ge "$timeout_seconds" ]; then + echo "Timed out waiting for $kind/$name in namespace $namespace" >&2 + dump_calico_debug + exit 1 + fi + sleep 5 + elapsed=$((elapsed + 5)) + done +} + +trap dump_calico_debug ERR + +wait_for_resource deployment tigera-operator tigera-operator 300 +kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator rollout status deployment/tigera-operator --timeout=300s + +wait_for_resource daemonset calico-system calico-node 600 +wait_for_resource deployment calico-system calico-kube-controllers 600 kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status daemonset/calico-node --timeout=600s kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status deployment/calico-kube-controllers --timeout=600s kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Ready nodes --all --timeout=600s