fixing stale calico helm
This commit is contained in:
parent
4881c3cea1
commit
66d7dd39a7
|
|
@ -35,15 +35,46 @@ resource "helm_release" "calico_crds" {
|
|||
create_namespace = true
|
||||
}
|
||||
|
||||
resource "helm_release" "calico" {
|
||||
resource "null_resource" "calico_helm_recovery" {
|
||||
depends_on = [helm_release.calico_crds]
|
||||
|
||||
triggers = {
|
||||
always = timestamp()
|
||||
kubeconfig_path = var.kubeconfig_path
|
||||
namespace = var.calico.namespace
|
||||
release_name = "calico"
|
||||
}
|
||||
|
||||
provisioner "local-exec" {
|
||||
interpreter = ["/bin/bash", "-lc"]
|
||||
command = <<EOT
|
||||
set -euo pipefail
|
||||
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" get secrets \
|
||||
-l "owner=helm,name=${self.triggers.release_name}" \
|
||||
-o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.status}{"\n"}{end}' 2>/dev/null |
|
||||
while IFS=$'\t' read -r secret status; do
|
||||
case "$status" in
|
||||
pending-install|pending-upgrade|pending-rollback|failed|uninstalling)
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "${self.triggers.namespace}" delete secret "$secret"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
EOT
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "calico" {
|
||||
depends_on = [null_resource.calico_helm_recovery]
|
||||
name = "calico"
|
||||
repository = var.calico.repository
|
||||
chart = "tigera-operator"
|
||||
version = var.calico.version
|
||||
namespace = var.calico.namespace
|
||||
create_namespace = true
|
||||
timeout = 600
|
||||
timeout = 900
|
||||
wait = false
|
||||
cleanup_on_fail = true
|
||||
|
||||
values = [
|
||||
yamlencode({
|
||||
|
|
@ -90,6 +121,45 @@ resource "null_resource" "calico_ready" {
|
|||
command = <<EOT
|
||||
set -euo pipefail
|
||||
|
||||
dump_calico_debug() {
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get nodes -o wide || true
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" get pods -A -o wide || true
|
||||
|
||||
for ns in tigera-operator calico-system kube-system; do
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$ns" get events --sort-by=.lastTimestamp 2>/dev/null | tail -80 || true
|
||||
done
|
||||
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator describe deployment tigera-operator || true
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator logs deployment/tigera-operator --tail=160 || true
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe daemonset calico-node || true
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system describe deployment calico-kube-controllers || true
|
||||
}
|
||||
|
||||
wait_for_resource() {
|
||||
kind="$1"
|
||||
namespace="$2"
|
||||
name="$3"
|
||||
timeout_seconds="$4"
|
||||
elapsed=0
|
||||
|
||||
until kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n "$namespace" get "$kind/$name" >/dev/null 2>&1; do
|
||||
if [ "$elapsed" -ge "$timeout_seconds" ]; then
|
||||
echo "Timed out waiting for $kind/$name in namespace $namespace" >&2
|
||||
dump_calico_debug
|
||||
exit 1
|
||||
fi
|
||||
sleep 5
|
||||
elapsed=$((elapsed + 5))
|
||||
done
|
||||
}
|
||||
|
||||
trap dump_calico_debug ERR
|
||||
|
||||
wait_for_resource deployment tigera-operator tigera-operator 300
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n tigera-operator rollout status deployment/tigera-operator --timeout=300s
|
||||
|
||||
wait_for_resource daemonset calico-system calico-node 600
|
||||
wait_for_resource deployment calico-system calico-kube-controllers 600
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status daemonset/calico-node --timeout=600s
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" -n calico-system rollout status deployment/calico-kube-controllers --timeout=600s
|
||||
kubectl --kubeconfig "${self.triggers.kubeconfig_path}" wait --for=condition=Ready nodes --all --timeout=600s
|
||||
|
|
|
|||
Loading…
Reference in New Issue