terraform helm 等待条件时释放超时

terraform helm release timeout while waiting for condition

我正在使用 terraform 在 azure 中配置一些资源,但我似乎无法让 helm 来安装 nginx-ingress,因为它在等待条件时超时

  • helm_release.nginx_ingress: 1 error(s) occurred:

  • helm_release.nginx_ingress: rpc error: code = Unknown desc = release nginx-ingress failed: timed out waiting for the condition

Terraform does not automatically rollback in the face of errors. Instead, your Terraform state file has been partially updated with any resources that successfully completed. Please address the error above and apply again to incrementally change your infrastructure. main.tf

data "azurerm_public_ip" "nginx_ingress" {
    name                = "xxxx-public-ip"
    resource_group_name = "xxxx-public-ip"
}

resource "azurerm_resource_group" "xxxx_RG" {
  name     = "${var.name_prefix}"
  location = "${var.location}"
}

resource "azurerm_kubernetes_cluster" "k8s" {
    name                    = "${var.name_prefix}-aks"
    kubernetes_version      = "${var.kubernetes_version}"
    location                = "${azurerm_resource_group.xxxx_RG.location}"
    resource_group_name     = "${azurerm_resource_group.xxxx_RG.name}"
    dns_prefix              = "AKS-${var.dns_prefix}"

    agent_pool_profile {
        name                = "${var.node_pool_name}"
        count               = "${var.node_pool_size}"
        vm_size             = "${var.node_pool_vmsize}"
        os_type             = "${var.node_pool_os}"
        os_disk_size_gb     = 30
    }

    service_principal {
        client_id           = "${var.client_id}"
        client_secret       = "${var.client_secret}"
    }

    tags = {
        environment = "${var.env_tag}"
    }
}

provider "helm" {
  install_tiller = true

  kubernetes {
    host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
  }
}

# Add Kubernetes Stable Helm charts repo
resource "helm_repository" "stable" {
  name = "stable"
  url  = "https://kubernetes-charts.storage.googleapis.com"
}

# Install Nginx Ingress using Helm Chart
resource "helm_release" "nginx_ingress" {
  name       = "nginx-ingress"
  repository = "${helm_repository.stable.metadata.0.name}"
  chart      = "nginx-ingress"
  wait       = "true"

  set {
    name  = "rbac.create"
    value = "false"
  }

  set {
    name  = "controller.service.externalTrafficPolicy"
    value = "Local"
  }

  set {
    name  = "controller.service.loadBalancerIP"
    value = "${data.azurerm_public_ip.nginx_ingress.ip_address}"
  }
}

然后用这个部署我的应用程序

provider "kubernetes" {
    host                    = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    username                = "${azurerm_kubernetes_cluster.k8s.kube_config.0.username}"
    password                = "${azurerm_kubernetes_cluster.k8s.kube_config.0.password}"
    client_certificate      = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key              = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate  = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
}

resource "kubernetes_deployment" "flask_api_deployment" {
    metadata {
        name = "flask-api-deployment"
    }

    spec {
        replicas = 1
        selector {
            match_labels {
                component = "api"
            }
        }

        template {
            metadata {
                labels = {
                    component = "api"
                }
            }

            spec {
                container {
                    image = "xxxx.azurecr.io/sampleflask:0.1.0"
                    name = "flask-api"
                    port {
                        container_port = 5000
                    }
                }
            }
        }
    }
}

resource "kubernetes_ingress" "flask_api_ingress_service" {
    metadata {
        name = "flask-api-ingress-service"
    }

    spec {
        backend {
            service_name = "flask-api-cluster-ip-service"
            service_port = 5000
        }
    }
}

resource "kubernetes_service" "flask_api_cluster_ip-service" {
    metadata {
        name = "flask-api-cluster-ip-service"
    }

    spec {
        selector {
            component = "api"
        }

        port {
            port = 5000
            target_port = 5000
        }
    }
}

我不确定它在等待什么条件。我可以将超时设置得更大,但这似乎无济于事。我也可以在 helm 版本中设置 wait = false 但似乎没有资源被配置。

编辑:根据我所做的一些测试,我发现在 helm 版本中指定 loadbalancerIP 时存在问题。如果我评论出来它就完成了。

编辑:通过更多测试,我发现创建的负载均衡器未能创建。控制器:在资源组 MC_xxxxxxxx

中找不到用户提供的 IP 地址 52.xxx.x.xx

所以我想问题是如何允许指定来自不同资源组的 IP?

最好在集群中启用 RBAC。如何使用 Terraform 执行此操作并随后安装 Helm 的示例是:

…
resource "azurerm_kubernetes_cluster" "k8s" {
…

  role_based_access_control {
    enabled = "true"
  }

}

provider "kubernetes" {
  host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
  client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
  client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
  cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
}

resource "kubernetes_service_account" "tiller_sa" {
  metadata {
    name      = "tiller"
    namespace = "kube-system"
  }
}

resource "kubernetes_cluster_role_binding" "tiller_sa_cluster_admin_rb" {
  metadata {
    name = "tiller-cluster-role"
  }
  role_ref {
    kind      = "ClusterRole"
    name      = "cluster-admin"
    api_group = "rbac.authorization.k8s.io"
  }
  subject {
    kind      = "ServiceAccount"
    name      = "${kubernetes_service_account.tiller_sa.metadata.0.name}"
    namespace = "kube-system"
    api_group = ""
  }
}

# helm provider
provider "helm" {
  debug           = true
  namespace       = "kube-system"
  service_account = "tiller"
  install_tiller  = "true"
  tiller_image    = "gcr.io/kubernetes-helm/tiller:v${var.TILLER_VER}"
  kubernetes {
    host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
  }
}

data "helm_repository" "stable" {
  name = "stable"
  url  = "https://kubernetes-charts.storage.googleapis.com"
}

resource "helm_release" "datadog" {
  name       = "datadog"
  namespace  = "datadog"
  repository = "${data.helm_repository.stable.metadata.0.name}"
  chart      = "stable/datadog"

  set {
    name  = "datadog.apiKey"
    value = "${var.datadog_apikey}"
  }

}

要通过 Terraform 中的 helm 在 AKS 集群中安装 nginx-ingress,我在这里展示一种可用的方法。这样,你需要在你想要运行 terraform脚本的机器上安装helm。然后你还需要为你的 AKS 集群配置 helm。 Configure the helm to AKS 中的步骤。您可以通过向 AKS 安装一些东西来检查 helm 是否配置为 AKS。

当一切准备就绪。您只需要设置 helm 提供程序并使用资源 helm_release。安装 nginx-ingress 的 Terraform 脚本显示在这里:

provider "helm" {
  version = "~> 0.9"
}

resource "helm_release" "ingress" {
    name = "application1"
    chart = "stable/nginx-ingress"
    version = "1.10.2"
    namespace = "ingress-basic"

    set {
        name = "controller.replicaCount"
        value = "1"
    }

    ...

}

过程显示在这里:

这只是为了在 Terraform 中通过 helm 安装 nginx-ingress。如果你想创建 kubernetes 的资源。您可以在 Terraform 中使用 kubernetes。

更新:

好的,要使用另一个资源组中的静态 public IP 作为您的入口,您需要再执行两个步骤。

  1. AKS 集群使用的服务主体必须对 public IP 所在的其他资源组授予权限。权限至少应 "Network Contributor"。
  2. 使用 public IP 所在的资源组的值设置入口服务注释。

yaml 文件中的注释如下:

annotations:
    service.beta.kubernetes.io/azure-load-balancer-resource-group: myResourceGroup

有关详细信息,请参阅 Use a static IP address outside of the node resource group

更新1:

"helm_release"中的代码:

resource "helm_release" "ingress" {
    name = "application1223"
    chart = "stable/nginx-ingress"
        version = "1.10.2"
    namespace = "ingress-basic"

    set {
        name = "controller.replicaCount"
        value = "1"
    }

    set {
      name = "controller.service.annotations.\"service\.beta\.kubernetes\.io/azure-load-balancer-resource-group\""
      value = "v-chaxu-xxxx"
    }

    set {
      name = "controller.service.loadBalancerIP"
      value = "13.68.175.40"
    }

}

部署成功后,ingress服务显示如下:

在另一个资源组的public IP的信息:

我遇到了同样的问题(helm_release 超时)。在进一步调查中,我发现 Public IP 没有被分配给负载均衡器(kubectl describe svc nginx-ingress -n ingress-basic),因为 RBAC 权限不正确。

我正在使用 Azure AKS Managed Identity 功能,Azure 通过该功能自动创建一个只有非常有限权限的托管身份服务主体(对 AKS 自动创建的托管集群资源组的只读权限)。我的公共 IP 在另一个资源组中,负载均衡器在 AKS 群集的托管资源组中。

最后,我能够通过在 AKS 群集中使用 'Service Principal' 选项而不是托管身份来解决问题,并且可以 'contributor' 访问服务主体的订阅。

因此,如果有人在托管身份方面遇到问题,请尝试使用具有订阅者访问权限的服务主体,这将解决问题