无法使用 Terraform 为 AKS 集群创建命名空间,报告没有此类主机
Unable to create a namespace for AKS cluster using Terraform reports no such host
我有一个模块定义如下:
===
providers.tf
provider "kubernetes" {
#load_config_file = "false"
host = azurerm_kubernetes_cluster.aks.kube_config.0.host
username = azurerm_kubernetes_cluster.aks.kube_config.0.username
password = azurerm_kubernetes_cluster.aks.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate)
}
outputs.tf
output "node_resource_group" {
value = azurerm_kubernetes_cluster.aks.node_resource_group
description = "The name of resource group where the AKS Nodes are created"
}
output "kubeConfig" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
description = "Kubeconfig of AKS Cluster"
}
output "host" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.host
}
output "client_key" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key
}
output "client_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate
}
output "kube_config" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
}
output "cluster_ca_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate
}
main.tf
resource "azurerm_log_analytics_workspace" "law" {
name = "${var.tla}-la-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
sku = var.la_sku
retention_in_days = 30
}
resource "azurerm_kubernetes_cluster" "aks" {
name = "${var.tla}-aks-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
dns_prefix = var.dns_prefix
kubernetes_version = var.kubernetes_version
sku_tier = var.sku_tier
private_cluster_enabled = var.enable_private_cluster
#api_server_authorized_ip_ranges = ""
default_node_pool {
name = "syspool001"
orchestrator_version = var.orchestrator_version
availability_zones = var.agents_availability_zones
enable_auto_scaling = true
node_count = var.default_pool_node_count
max_count = var.default_pool_max_node_count
min_count = var.default_pool_min_node_count
max_pods = var.default_pool_max_pod_count
vm_size = var.agents_size
enable_node_public_ip = false
os_disk_size_gb = var.default_pool_os_disk_size_gb
type = "VirtualMachineScaleSets"
vnet_subnet_id = var.vnet_subnet_id
node_labels = var.agents_labels
tags = merge(local.tags, var.agents_tags)
}
network_profile {
network_plugin = var.network_plugin
network_policy = var.network_policy
dns_service_ip = var.net_profile_dns_service_ip
docker_bridge_cidr = var.net_profile_docker_bridge_cidr
service_cidr = var.net_profile_service_cidr
}
role_based_access_control {
enabled = true
azure_active_directory {
managed = true
admin_group_object_ids = var.rbac_aad_admin_group_object_ids
}
}
identity {
type = "SystemAssigned"
}
addon_profile {
azure_policy {
enabled = true
}
http_application_routing {
enabled = false
}
oms_agent {
enabled = true
log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id
}
}
tags = local.tags
lifecycle {
ignore_changes = [
default_node_pool
]
}
}
resource "azurerm_kubernetes_cluster_node_pool" "aksnp" {
lifecycle {
ignore_changes = [
node_count
]
}
for_each = var.additional_node_pools
kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
name = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12)
node_count = each.value.node_count
vm_size = each.value.vm_size
availability_zones = each.value.zones
max_pods = each.value.max_pods
os_disk_size_gb = each.value.os_disk_size_gb
os_type = each.value.node_os
vnet_subnet_id = var.vnet_subnet_id
node_taints = each.value.taints
enable_auto_scaling = each.value.cluster_auto_scaling
min_count = each.value.cluster_auto_scaling_min_count
max_count = each.value.cluster_auto_scaling_max_count
}
resource "kubernetes_namespace" "aks-namespace" {
metadata {
name = var.namespace
}
}
data.tf
data "azurerm_resource_group" "rg" {
name = var.resource_group_name
}
lookups.tf
locals {
environment_lookup = {
dev = "d"
test = "t"
int = "i"
prod = "p"
prd = "p"
uat = "a"
poc = "d"
dr = "r"
lab = "l"
}
lookup_result = lookup(local.environment_lookup, var.environment)
tags = merge(
data.azurerm_resource_group.rg.tags, {
Directory = "tectcompany.com",
PrivateDNSZone = var.private_dns_zone,
Immutable = "False",
ManagedOS = "True",
}
)
}
data "azurerm_log_analytics_workspace" "log_analytics" {
name = "abc-az-lad2"
resource_group_name = "abc-dev-aae"
}
variables.tf
variable "secondary_region" {
description = "Is this resource being deployed into the secondary (pair) region?"
default = false
type = bool
}
variable "override_log_analytics_workspace" {
description = "Override the vm log analytics workspace"
type = string
default = null
}
variable "override_log_analytics_resource_group_name" {
description = "Overrides the log analytics resource group name"
type = string
default = null
}
variable "environment" {
description = "The name of environment for the AKS Cluster"
type = string
default = "dev"
}
variable "identifier" {
description = "The identifier for the AKS Cluster"
type = number
default = "001"
}
variable "kubernetes_version" {
description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region"
type = string
default = "1.19.9"
}
variable "dns_prefix" {
description = "The dns prefix for the AKS Cluster"
type = string
default = "odessa-sandpit"
}
variable "orchestrator_version" {
description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region"
type = string
default = null
}
variable "agents_availability_zones" {
description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created."
type = list(string)
default = null
}
variable "agents_size" {
default = "Standard_D4s_v3"
description = "The default virtual machine size for the Kubernetes agents"
type = string
}
variable "vnet_subnet_id" {
description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created."
type = string
default = null
}
variable "agents_labels" {
description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created."
type = map(string)
default = {}
}
variable "agents_tags" {
description = "(Optional) A mapping of tags to assign to the Node Pool."
type = map(string)
default = {}
}
variable "net_profile_dns_service_ip" {
description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_docker_bridge_cidr" {
description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_service_cidr" {
description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created."
type = string
default = null
}
variable "rbac_aad_admin_group_object_ids" {
description = "Object ID of groups with admin access."
type = list(string)
default = null
}
variable "network_policy" {
description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "network_plugin" {
description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "enable_private_cluster" {
description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
default = true
}
variable "default_pool_node_count" {
description = "(Optional) The initial node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_node_count" {
description = "(Optional) The max node count for the default pool of AKS Cluster"
type = number
default = 6
}
variable "default_pool_min_node_count" {
description = "(Optional) The min node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_pod_count" {
description = "(Optional) The max pod count for the default pool of AKS Cluster"
type = number
default = 13
}
variable "default_pool_os_disk_size_gb" {
description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster"
type = string
default = "64"
}
variable "additional_node_pools" {
type = map(object({
node_count = number
max_pods = number
os_disk_size_gb = number
vm_size = string
zones = list(string)
node_os = string
taints = list(string)
cluster_auto_scaling = bool
cluster_auto_scaling_min_count = number
cluster_auto_scaling_max_count = number
}))
}
variable "sku_tier" {
description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid"
type = string
default = "Paid"
validation {
condition = contains(["Free", "Paid"], var.sku_tier)
error_message = "SKU_TIER can only be either Paid or Free."
}
}
variable "la_sku" {
description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible."
type = string
default = "PerGB2018"
validation {
condition = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku)
error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)."
}
}
variable "resource_group_name" {
description = "Resource Group for deploying AKS Cluster"
type = string
}
variable "private_dns_zone" {
description = "DNS prefix for AKS Cluster"
type = string
default = "testcluster"
}
variable "tla" {
description = "Three Level acronym - three letter abbreviation for application"
type = string
default = ""
validation {
condition = length(var.tla) == 3
error_message = "The TLA should be precisely three characters."
}
}
variable "namespace"{
description = "AKS Namespace"
type = string
}
最后,我在下面调用我的模块来创建 AKS 集群、LA 和 AKS 集群的命名空间:
provider "azurerm" {
features {}
#version = "~> 2.53.0"
}
module "aks-cluster1" {
source = "../../"
resource_group_name = "pst-aks-sandpit-dev-1"
tla = "pqr"
additional_node_pools = {
pool1 = {
node_count = "1"
max_pods = "110"
os_disk_size_gb = "30"
vm_size = "Standard_D8s_v3"
zones = ["1","2","3"]
node_os = "Linux"
taints = ["kubernetes.io/os=windows:NoSchedule"]
cluster_auto_scaling = true
cluster_auto_scaling_min_count = "2"
cluster_auto_scaling_max_count = "4"
}
}
namespace = "sample-ns"
}
问题:
当 terraform 尝试创建集群时,我得到一个错误,提示没有这样的主机。
我认为它无法连接到集群,但我可能错了。不知道内部是怎么处理的
Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host
很难说是什么问题,因为您发布的代码不完整。对于初学者,您不应该这样做:
provider "kubernetes" {
config_path = "~/.kube/config"
}
您发布的 AKS URL 不存在,所以我认为这是从您的 kube 配置中提取旧集群默认值
感谢您提供更多详细信息。我在这里看到了一些问题。第一个是您当前问题的核心:
variable "enable_private_cluster" {
description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
default = true
}
您的集群部署在此处采用默认设置,因此您的 API 端点是区域 privatelink.australiaeast.azmk8s.io
:
中的私有 DNS 条目
Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces"
terraform kubernetes 提供程序必须能够到达 API 端点才能部署命名空间。但是,它无法解析域。为此,您需要确保:
- Azure 中存在私有 DNS 区域
- 私有 DNS 区域链接到相关虚拟网络,包括您所在的主机 运行 Terraform
- Terraform 主机上的 DNS 解析器可以通过 https://docs.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16 中定义的端点解析私有链接域 - 请注意,如果您的网络使用本地内部 DNS,这可能需要转发私有域。
- 您必须确保您的 Terraform 主机可以到达集群在 TCP 端口 443 上部署的 privatelink 端点
Azure privatelink 和私有 DNS 的正确配置并非易事,尤其是在复杂的网络环境中。因此,您可能会遇到我未在此处介绍的其他障碍。
或者,您可能希望通过将此模块选项设置为 false,在不使用 privatelink 的情况下部署此集群。出于安全和合规原因,这可能是不可取的,因此请确保您了解您在此处所做的事情:
enable_private_cluster = false
我遇到的下一个问题是:
Error: creating Managed Kubernetes Cluster "pqr-aks-d-1" (Resource Group "pst-aks-sandpit-dev-1"): containerservice.ManagedClustersClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: Code="InsufficientAgentPoolMaxPodsPerAgentPool" Message="The AgentPoolProfile 'syspool001' has an invalid total maxPods(maxPods per node * node count), the total maxPods(13 * 824668498368) should be larger than 30. Please refer to aka.ms/aks-min-max-pod for more detail." Target="agentPoolProfile.kubernetesConfig.kubeletConfig.maxPods"
我通过设置克服了这个问题:
default_pool_max_pod_count = 30
最后一个问题是您需要配置kubernetes provider 以具有足够的权限来部署命名空间:
│ Error: Unauthorized
│
│ with module.aks-cluster1.kubernetes_namespace.aks-namespace,
│ on ../../main.tf line 103, in resource "kubernetes_namespace" "aks-namespace":
│ 103: resource "kubernetes_namespace" "aks-namespace" {
实现这一点的一种方法是使用 kube_admin_config 而不是 kube_config:
provider "kubernetes" {
#load_config_file = "false"
host = azurerm_kubernetes_cluster.aks.kube_admin_config.0.host
username = azurerm_kubernetes_cluster.aks.kube_admin_config.0.username
password = azurerm_kubernetes_cluster.aks.kube_admin_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.cluster_ca_certificate)
}
我是 Terraform Kubernetes 提供商的维护者之一,我经常看到这个问题。作为一名前 devops 人员,我对我在这一领域不断看到的斗争表示同情。如果可能的话,我真的很想在提供程序中修复它。
您面临的问题是 passing an unknown value to a provider configuration block 时 Terraform 核心的限制。引用他们的文档:
You can use expressions in the values of these configuration arguments,
but can only reference values that are known before the configuration is applied.
当您对底层基础设施(例如本例中的 AKS 集群)进行更改时,您会将一个未知值传递到 Kubernetes 提供程序配置块中,因为集群基础设施的完整范围直到之后才为人所知更改已应用于 AKS 群集。
虽然我确实写了初始指南来表明 it can be possible to work around some of these issues, as you've found from experience, there are many edge cases that make it an unreliable and unintuitive process, to get the Kubernetes provider working alongside the underlying infrastructure. This is due to a long-standing limitation in Terraform, that can't be fixed in any provider, but we do have plans to smooth out the bumps a little by adding better error messages upfront,但在这种情况下,这会让您省去一些麻烦。
为了解决这种特殊类型的问题,集群基础设施需要保持与 Kubernetes 和 Helm 提供者资源分离的状态。我这里有一个示例,它在一个应用程序中构建一个 AKS 集群,然后在第二个应用程序中管理 Kubernetes/Helm 资源。您可以使用这种方法为您的特定用例构建最强大的配置:
我知道这种两次申请的方式很不方便,这就是为什么我们继续尝试在单一申请场景中适应用户,以及包含处于相同 Terraform 状态的 Kubernetes 和集群资源的场景。然而,在上游 Terraform 可以添加对此的支持之前,单一应用工作流将仍然存在错误,并且不如将集群基础设施与 Kubernetes 资源分开可靠。
大多数情况下都可以使用 depends_on
来解决(以确保在 Kubernetes 资源之前创建集群),或者通过将集群基础架构移动到单独的模块中并 运行ning terraform state rm module.kubernetes-config
或 terraform apply -target=module.aks-cluster
。但我认为鼓励这种变通方法会在长期 运行 中引起更多麻烦,因为它让用户负责确定何时使用特殊的一次性应用命令,而不是设置 Terraform 来执行从一开始就可靠且可预测。此外,它可能会产生意想不到的副作用,例如 orphaning cloud resources.
我有一个模块定义如下:
===
providers.tf
provider "kubernetes" {
#load_config_file = "false"
host = azurerm_kubernetes_cluster.aks.kube_config.0.host
username = azurerm_kubernetes_cluster.aks.kube_config.0.username
password = azurerm_kubernetes_cluster.aks.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate)
}
outputs.tf
output "node_resource_group" {
value = azurerm_kubernetes_cluster.aks.node_resource_group
description = "The name of resource group where the AKS Nodes are created"
}
output "kubeConfig" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
description = "Kubeconfig of AKS Cluster"
}
output "host" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.host
}
output "client_key" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key
}
output "client_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate
}
output "kube_config" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
}
output "cluster_ca_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate
}
main.tf
resource "azurerm_log_analytics_workspace" "law" {
name = "${var.tla}-la-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
sku = var.la_sku
retention_in_days = 30
}
resource "azurerm_kubernetes_cluster" "aks" {
name = "${var.tla}-aks-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
dns_prefix = var.dns_prefix
kubernetes_version = var.kubernetes_version
sku_tier = var.sku_tier
private_cluster_enabled = var.enable_private_cluster
#api_server_authorized_ip_ranges = ""
default_node_pool {
name = "syspool001"
orchestrator_version = var.orchestrator_version
availability_zones = var.agents_availability_zones
enable_auto_scaling = true
node_count = var.default_pool_node_count
max_count = var.default_pool_max_node_count
min_count = var.default_pool_min_node_count
max_pods = var.default_pool_max_pod_count
vm_size = var.agents_size
enable_node_public_ip = false
os_disk_size_gb = var.default_pool_os_disk_size_gb
type = "VirtualMachineScaleSets"
vnet_subnet_id = var.vnet_subnet_id
node_labels = var.agents_labels
tags = merge(local.tags, var.agents_tags)
}
network_profile {
network_plugin = var.network_plugin
network_policy = var.network_policy
dns_service_ip = var.net_profile_dns_service_ip
docker_bridge_cidr = var.net_profile_docker_bridge_cidr
service_cidr = var.net_profile_service_cidr
}
role_based_access_control {
enabled = true
azure_active_directory {
managed = true
admin_group_object_ids = var.rbac_aad_admin_group_object_ids
}
}
identity {
type = "SystemAssigned"
}
addon_profile {
azure_policy {
enabled = true
}
http_application_routing {
enabled = false
}
oms_agent {
enabled = true
log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id
}
}
tags = local.tags
lifecycle {
ignore_changes = [
default_node_pool
]
}
}
resource "azurerm_kubernetes_cluster_node_pool" "aksnp" {
lifecycle {
ignore_changes = [
node_count
]
}
for_each = var.additional_node_pools
kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
name = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12)
node_count = each.value.node_count
vm_size = each.value.vm_size
availability_zones = each.value.zones
max_pods = each.value.max_pods
os_disk_size_gb = each.value.os_disk_size_gb
os_type = each.value.node_os
vnet_subnet_id = var.vnet_subnet_id
node_taints = each.value.taints
enable_auto_scaling = each.value.cluster_auto_scaling
min_count = each.value.cluster_auto_scaling_min_count
max_count = each.value.cluster_auto_scaling_max_count
}
resource "kubernetes_namespace" "aks-namespace" {
metadata {
name = var.namespace
}
}
data.tf
data "azurerm_resource_group" "rg" {
name = var.resource_group_name
}
lookups.tf
locals {
environment_lookup = {
dev = "d"
test = "t"
int = "i"
prod = "p"
prd = "p"
uat = "a"
poc = "d"
dr = "r"
lab = "l"
}
lookup_result = lookup(local.environment_lookup, var.environment)
tags = merge(
data.azurerm_resource_group.rg.tags, {
Directory = "tectcompany.com",
PrivateDNSZone = var.private_dns_zone,
Immutable = "False",
ManagedOS = "True",
}
)
}
data "azurerm_log_analytics_workspace" "log_analytics" {
name = "abc-az-lad2"
resource_group_name = "abc-dev-aae"
}
variables.tf
variable "secondary_region" {
description = "Is this resource being deployed into the secondary (pair) region?"
default = false
type = bool
}
variable "override_log_analytics_workspace" {
description = "Override the vm log analytics workspace"
type = string
default = null
}
variable "override_log_analytics_resource_group_name" {
description = "Overrides the log analytics resource group name"
type = string
default = null
}
variable "environment" {
description = "The name of environment for the AKS Cluster"
type = string
default = "dev"
}
variable "identifier" {
description = "The identifier for the AKS Cluster"
type = number
default = "001"
}
variable "kubernetes_version" {
description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region"
type = string
default = "1.19.9"
}
variable "dns_prefix" {
description = "The dns prefix for the AKS Cluster"
type = string
default = "odessa-sandpit"
}
variable "orchestrator_version" {
description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region"
type = string
default = null
}
variable "agents_availability_zones" {
description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created."
type = list(string)
default = null
}
variable "agents_size" {
default = "Standard_D4s_v3"
description = "The default virtual machine size for the Kubernetes agents"
type = string
}
variable "vnet_subnet_id" {
description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created."
type = string
default = null
}
variable "agents_labels" {
description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created."
type = map(string)
default = {}
}
variable "agents_tags" {
description = "(Optional) A mapping of tags to assign to the Node Pool."
type = map(string)
default = {}
}
variable "net_profile_dns_service_ip" {
description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_docker_bridge_cidr" {
description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_service_cidr" {
description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created."
type = string
default = null
}
variable "rbac_aad_admin_group_object_ids" {
description = "Object ID of groups with admin access."
type = list(string)
default = null
}
variable "network_policy" {
description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "network_plugin" {
description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "enable_private_cluster" {
description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
default = true
}
variable "default_pool_node_count" {
description = "(Optional) The initial node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_node_count" {
description = "(Optional) The max node count for the default pool of AKS Cluster"
type = number
default = 6
}
variable "default_pool_min_node_count" {
description = "(Optional) The min node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_pod_count" {
description = "(Optional) The max pod count for the default pool of AKS Cluster"
type = number
default = 13
}
variable "default_pool_os_disk_size_gb" {
description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster"
type = string
default = "64"
}
variable "additional_node_pools" {
type = map(object({
node_count = number
max_pods = number
os_disk_size_gb = number
vm_size = string
zones = list(string)
node_os = string
taints = list(string)
cluster_auto_scaling = bool
cluster_auto_scaling_min_count = number
cluster_auto_scaling_max_count = number
}))
}
variable "sku_tier" {
description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid"
type = string
default = "Paid"
validation {
condition = contains(["Free", "Paid"], var.sku_tier)
error_message = "SKU_TIER can only be either Paid or Free."
}
}
variable "la_sku" {
description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible."
type = string
default = "PerGB2018"
validation {
condition = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku)
error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)."
}
}
variable "resource_group_name" {
description = "Resource Group for deploying AKS Cluster"
type = string
}
variable "private_dns_zone" {
description = "DNS prefix for AKS Cluster"
type = string
default = "testcluster"
}
variable "tla" {
description = "Three Level acronym - three letter abbreviation for application"
type = string
default = ""
validation {
condition = length(var.tla) == 3
error_message = "The TLA should be precisely three characters."
}
}
variable "namespace"{
description = "AKS Namespace"
type = string
}
最后,我在下面调用我的模块来创建 AKS 集群、LA 和 AKS 集群的命名空间:
provider "azurerm" {
features {}
#version = "~> 2.53.0"
}
module "aks-cluster1" {
source = "../../"
resource_group_name = "pst-aks-sandpit-dev-1"
tla = "pqr"
additional_node_pools = {
pool1 = {
node_count = "1"
max_pods = "110"
os_disk_size_gb = "30"
vm_size = "Standard_D8s_v3"
zones = ["1","2","3"]
node_os = "Linux"
taints = ["kubernetes.io/os=windows:NoSchedule"]
cluster_auto_scaling = true
cluster_auto_scaling_min_count = "2"
cluster_auto_scaling_max_count = "4"
}
}
namespace = "sample-ns"
}
问题: 当 terraform 尝试创建集群时,我得到一个错误,提示没有这样的主机。
我认为它无法连接到集群,但我可能错了。不知道内部是怎么处理的
Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host
很难说是什么问题,因为您发布的代码不完整。对于初学者,您不应该这样做:
provider "kubernetes" {
config_path = "~/.kube/config"
}
您发布的 AKS URL 不存在,所以我认为这是从您的 kube 配置中提取旧集群默认值
感谢您提供更多详细信息。我在这里看到了一些问题。第一个是您当前问题的核心:
variable "enable_private_cluster" {
description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
default = true
}
您的集群部署在此处采用默认设置,因此您的 API 端点是区域 privatelink.australiaeast.azmk8s.io
:
Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces"
terraform kubernetes 提供程序必须能够到达 API 端点才能部署命名空间。但是,它无法解析域。为此,您需要确保:
- Azure 中存在私有 DNS 区域
- 私有 DNS 区域链接到相关虚拟网络,包括您所在的主机 运行 Terraform
- Terraform 主机上的 DNS 解析器可以通过 https://docs.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16 中定义的端点解析私有链接域 - 请注意,如果您的网络使用本地内部 DNS,这可能需要转发私有域。
- 您必须确保您的 Terraform 主机可以到达集群在 TCP 端口 443 上部署的 privatelink 端点
Azure privatelink 和私有 DNS 的正确配置并非易事,尤其是在复杂的网络环境中。因此,您可能会遇到我未在此处介绍的其他障碍。
或者,您可能希望通过将此模块选项设置为 false,在不使用 privatelink 的情况下部署此集群。出于安全和合规原因,这可能是不可取的,因此请确保您了解您在此处所做的事情:
enable_private_cluster = false
我遇到的下一个问题是:
Error: creating Managed Kubernetes Cluster "pqr-aks-d-1" (Resource Group "pst-aks-sandpit-dev-1"): containerservice.ManagedClustersClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: Code="InsufficientAgentPoolMaxPodsPerAgentPool" Message="The AgentPoolProfile 'syspool001' has an invalid total maxPods(maxPods per node * node count), the total maxPods(13 * 824668498368) should be larger than 30. Please refer to aka.ms/aks-min-max-pod for more detail." Target="agentPoolProfile.kubernetesConfig.kubeletConfig.maxPods"
我通过设置克服了这个问题:
default_pool_max_pod_count = 30
最后一个问题是您需要配置kubernetes provider 以具有足够的权限来部署命名空间:
│ Error: Unauthorized
│
│ with module.aks-cluster1.kubernetes_namespace.aks-namespace,
│ on ../../main.tf line 103, in resource "kubernetes_namespace" "aks-namespace":
│ 103: resource "kubernetes_namespace" "aks-namespace" {
实现这一点的一种方法是使用 kube_admin_config 而不是 kube_config:
provider "kubernetes" {
#load_config_file = "false"
host = azurerm_kubernetes_cluster.aks.kube_admin_config.0.host
username = azurerm_kubernetes_cluster.aks.kube_admin_config.0.username
password = azurerm_kubernetes_cluster.aks.kube_admin_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_admin_config.0.cluster_ca_certificate)
}
我是 Terraform Kubernetes 提供商的维护者之一,我经常看到这个问题。作为一名前 devops 人员,我对我在这一领域不断看到的斗争表示同情。如果可能的话,我真的很想在提供程序中修复它。
您面临的问题是 passing an unknown value to a provider configuration block 时 Terraform 核心的限制。引用他们的文档:
You can use expressions in the values of these configuration arguments,
but can only reference values that are known before the configuration is applied.
当您对底层基础设施(例如本例中的 AKS 集群)进行更改时,您会将一个未知值传递到 Kubernetes 提供程序配置块中,因为集群基础设施的完整范围直到之后才为人所知更改已应用于 AKS 群集。
虽然我确实写了初始指南来表明 it can be possible to work around some of these issues, as you've found from experience, there are many edge cases that make it an unreliable and unintuitive process, to get the Kubernetes provider working alongside the underlying infrastructure. This is due to a long-standing limitation in Terraform, that can't be fixed in any provider, but we do have plans to smooth out the bumps a little by adding better error messages upfront,但在这种情况下,这会让您省去一些麻烦。
为了解决这种特殊类型的问题,集群基础设施需要保持与 Kubernetes 和 Helm 提供者资源分离的状态。我这里有一个示例,它在一个应用程序中构建一个 AKS 集群,然后在第二个应用程序中管理 Kubernetes/Helm 资源。您可以使用这种方法为您的特定用例构建最强大的配置:
我知道这种两次申请的方式很不方便,这就是为什么我们继续尝试在单一申请场景中适应用户,以及包含处于相同 Terraform 状态的 Kubernetes 和集群资源的场景。然而,在上游 Terraform 可以添加对此的支持之前,单一应用工作流将仍然存在错误,并且不如将集群基础设施与 Kubernetes 资源分开可靠。
大多数情况下都可以使用 depends_on
来解决(以确保在 Kubernetes 资源之前创建集群),或者通过将集群基础架构移动到单独的模块中并 运行ning terraform state rm module.kubernetes-config
或 terraform apply -target=module.aks-cluster
。但我认为鼓励这种变通方法会在长期 运行 中引起更多麻烦,因为它让用户负责确定何时使用特殊的一次性应用命令,而不是设置 Terraform 来执行从一开始就可靠且可预测。此外,它可能会产生意想不到的副作用,例如 orphaning cloud resources.