为什么 Terraform 强制更换极光全局数据库?
Why is terraform forcing replacement of aurora global database?
Terraform CLI 和 Terraform AWS 提供商版本
- 安装自 https://releases.hashicorp.com/terraform/0.13.5/terraform_0.13.5_linux_amd64.zip
- hashicorp/aws v3.15.0
受影响的资源
- aws_rds_cluster
- aws_rds_cluster_实例
Terraform 配置文件
# inside ./modules/rds/main.tf
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
required_version = "~> 0.13"
}
provider "aws" {
alias = "primary"
}
provider "aws" {
alias = "dr"
}
locals {
region_tags = ["primary", "dr"]
db_name = "${var.project_name}-${var.stage}-db"
db_cluster_0 = "${local.db_name}-cluster-${local.region_tags[0]}"
db_cluster_1 = "${local.db_name}-cluster-${local.region_tags[1]}"
db_instance_name = "${local.db_name}-instance"
}
resource "aws_rds_global_cluster" "global_db" {
global_cluster_identifier = "${var.project_name}-${var.stage}"
database_name = "${var.project_name}${var.stage}db"
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
// force_destroy = true
}
resource "aws_rds_cluster" "primary_cluster" {
depends_on = [aws_rds_global_cluster.global_db]
provider = aws.primary
cluster_identifier = "${local.db_name}-cluster-${local.region_tags[0]}"
# the database name does not allow dashes:
database_name = "${var.project_name}${var.stage}db"
# The engine and engine_version must be repeated in aws_rds_global_cluster,
# aws_rds_cluster, and aws_rds_cluster_instance to
# avoid "Value for engine should match" error
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
engine_mode = "global"
global_cluster_identifier = aws_rds_global_cluster.global_db.id
# backtrack and multi-master not supported by Aurora Global.
master_username = var.username
master_password = var.password
backup_retention_period = 5
preferred_backup_window = "07:00-09:00"
db_subnet_group_name = aws_db_subnet_group.primary.id
# We must have these values, because destroying or rolling back requires them
skip_final_snapshot = true
final_snapshot_identifier = "ci-aurora-cluster-backup"
tags = {
Name = local.db_cluster_0
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_instance" "primary" {
depends_on = [aws_rds_global_cluster.global_db]
provider = aws.primary
cluster_identifier = aws_rds_cluster.primary_cluster.id
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
instance_class = "db.${var.instance_class}.${var.instance_size}"
db_subnet_group_name = aws_db_subnet_group.primary.id
tags = {
Name = local.db_instance_name
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster" "dr_cluster" {
depends_on = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
provider = aws.dr
cluster_identifier = "${local.db_name}-cluster-${local.region_tags[1]}"
# db name now allowed to specified on secondary regions
# The engine and engine_version must be repeated in aws_rds_global_cluster,
# aws_rds_cluster, and aws_rds_cluster_instance to
# avoid "Value for engine should match" error
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
engine_mode = "global"
global_cluster_identifier = aws_rds_global_cluster.global_db.id
# backtrack and multi-master not supported by Aurora Global.
# cannot specify username/password in cross-region replication cluster:
backup_retention_period = 5
preferred_backup_window = "07:00-09:00"
db_subnet_group_name = aws_db_subnet_group.dr.id
# We must have these values, because destroying or rolling back requires them
skip_final_snapshot = true
final_snapshot_identifier = "ci-aurora-cluster-backup"
tags = {
Name = local.db_cluster_1
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_instance" "dr_instance" {
depends_on = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
provider = aws.dr
cluster_identifier = aws_rds_cluster.dr_cluster.id
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
instance_class = "db.${var.instance_class}.${var.instance_size}"
db_subnet_group_name = aws_db_subnet_group.dr.id
tags = {
Name = local.db_instance_name
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_db_subnet_group" "primary" {
name = "${local.db_name}-subnetgroup"
subnet_ids = var.subnet_ids
provider = aws.primary
tags = {
Name = "primary_subnet_group"
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_db_subnet_group" "dr" {
provider = aws.dr
name = "${local.db_name}-subnetgroup"
subnet_ids = var.dr_subnet_ids
tags = {
Name = "dr_subnet_group"
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_parameter_group" "default" {
name = "rds-cluster-pg"
family = "aurora-mysql${var.mysql_version}"
description = "RDS default cluster parameter group"
parameter {
name = "character_set_server"
value = "utf8"
}
parameter {
name = "character_set_client"
value = "utf8"
}
parameter {
name = "aurora_parallel_query"
value = "ON"
apply_method = "pending-reboot"
}
}
在 ./modules/sns/main.tf
中,这是我在 ./modules
目录中调用 terraform apply
时添加的资源:
resource "aws_sns_topic" "foo_topic" {
name = "foo-${var.stage}-${var.topic_name}"
tags = {
Name = "foo-${var.stage}-${var.topic_name}"
Stage = var.stage
CreatedBy = var.created_by
CreatedOn = timestamp()
}
}
./modules/main.tf
:
terraform {
backend "s3" {
bucket = "terraform-remote-state-s3-bucket-unique-name"
key = "terraform.tfstate"
region = "us-east-2"
dynamodb_table = "TerraformLockTable"
}
}
provider "aws" {
alias = "primary"
region = var.region
}
provider "aws" {
alias = "dr"
region = var.dr_region
}
module "vpc" {
stage = var.stage
source = "./vpc"
providers = {
aws = aws.primary
}
}
module "dr_vpc" {
stage = var.stage
source = "./vpc"
providers = {
aws = aws.dr
}
}
module "vpc_security_group" {
source = "./vpc_security_group"
vpc_id = module.vpc.vpc_id
providers = {
aws = aws.primary
}
}
module "rds" {
source = "./rds"
stage = var.stage
created_by = var.created_by
vpc_id = module.vpc.vpc_id
subnet_ids = [module.vpc.subnet_a_id, module.vpc.subnet_b_id, module.vpc.subnet_c_id]
dr_subnet_ids = [module.dr_vpc.subnet_a_id, module.dr_vpc.subnet_b_id, module.dr_vpc.subnet_c_id]
region = var.region
username = var.rds_username
password = var.rds_password
providers = {
aws.primary = aws.primary
aws.dr = aws.dr
}
}
module "sns_start" {
stage = var.stage
source = "./sns"
topic_name = "start"
created_by = var.created_by
}
./modules/variables.tf
:
variable "region" {
default = "us-east-2"
}
variable "dr_region" {
default = "us-west-2"
}
variable "service" {
type = string
default = "foo-back"
description = "service to match what serverless framework deploys"
}
variable "stage" {
type = string
default = "sandbox"
description = "The stage to deploy: sandbox, dev, qa, uat, or prod"
validation {
condition = can(regex("sandbox|dev|qa|uat|prod", var.stage))
error_message = "The stage value must be a valid stage: sandbox, dev, qa, uat, or prod."
}
}
variable "created_by" {
description = "Company or vendor name followed by the username part of the email address"
}
variable "rds_username" {
description = "Username for rds"
}
variable "rds_password" {
description = "Password for rds"
}
./modules/sns/main.tf
:
resource "aws_sns_topic" "foo_topic" {
name = "foo-${var.stage}-${var.topic_name}"
tags = {
Name = "foo-${var.stage}-${var.topic_name}"
Stage = var.stage
CreatedBy = var.created_by
CreatedOn = timestamp()
}
}
./modules/sns/output.tf
:
output "sns_topic_arn" {
value = aws_sns_topic.foo_topic.arn
}
调试输出
两个输出都修改了密钥、名称、帐户 ID 等:
- 来自 运行ning
terraform apply
的 plan
输出:
https://gist.github.com/ystoneman/95df711ee0a11d44e035b9f8f39b75f3
- 申请前
state
:https://gist.github.com/ystoneman/5c842769c28e1ae5969f9aaff1556b37
预期行为
整个./modules/main.tf
已经创建好了,只添加了SNS模块,所以只需要创建SNS模块即可。
实际行为
但是,RDS 资源也受到影响,并且 terraform“声称”engine_mode
已从 provisioned
更改为 global
,即使它已经是 global
根据控制台:
plan
输出还表示 cluster_identifier
只是 known after apply
,因此 forces replacement
,但是,我认为 cluster_identifier
是让aws_rds_cluster
知道它属于aws_rds_global_cluster
,对于aws_rds_cluster_instance
知道它属于aws_rds_cluster
。
重现步骤
- 注释掉
module "sns_start"
- cd
./modules
terraform apply
(这一步完成后就是我包含的状态文件所在的位置)
- 取消注释
module "sns_start"
terraform apply
(此时我提供调试输出)
重要事实
无论我 运行 从我的 Mac 还是在 AWS CodeBuild 中 运行 都会发生此问题。
参考资料
似乎 AWS Terraform tried to destory and rebuild RDS cluster 也引用了它,但它并不特定于全局集群,在全局集群中您确实需要标识符,以便实例和集群知道它们属于什么。
您似乎使用的是过时版本的 aws 提供程序并且错误地指定了 engine_mode
。有一个与此相关的错误票:https://github.com/hashicorp/terraform-provider-aws/issues/16088
它已在 3.15.0 版本中修复,您可以通过
使用
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.15.0"
}
}
required_version = "~> 0.13"
}
此外,您应该从您的地形规范中完全删除 engine_mode
属性。
Terraform CLI 和 Terraform AWS 提供商版本
- 安装自 https://releases.hashicorp.com/terraform/0.13.5/terraform_0.13.5_linux_amd64.zip
- hashicorp/aws v3.15.0
受影响的资源
- aws_rds_cluster
- aws_rds_cluster_实例
Terraform 配置文件
# inside ./modules/rds/main.tf
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
required_version = "~> 0.13"
}
provider "aws" {
alias = "primary"
}
provider "aws" {
alias = "dr"
}
locals {
region_tags = ["primary", "dr"]
db_name = "${var.project_name}-${var.stage}-db"
db_cluster_0 = "${local.db_name}-cluster-${local.region_tags[0]}"
db_cluster_1 = "${local.db_name}-cluster-${local.region_tags[1]}"
db_instance_name = "${local.db_name}-instance"
}
resource "aws_rds_global_cluster" "global_db" {
global_cluster_identifier = "${var.project_name}-${var.stage}"
database_name = "${var.project_name}${var.stage}db"
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
// force_destroy = true
}
resource "aws_rds_cluster" "primary_cluster" {
depends_on = [aws_rds_global_cluster.global_db]
provider = aws.primary
cluster_identifier = "${local.db_name}-cluster-${local.region_tags[0]}"
# the database name does not allow dashes:
database_name = "${var.project_name}${var.stage}db"
# The engine and engine_version must be repeated in aws_rds_global_cluster,
# aws_rds_cluster, and aws_rds_cluster_instance to
# avoid "Value for engine should match" error
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
engine_mode = "global"
global_cluster_identifier = aws_rds_global_cluster.global_db.id
# backtrack and multi-master not supported by Aurora Global.
master_username = var.username
master_password = var.password
backup_retention_period = 5
preferred_backup_window = "07:00-09:00"
db_subnet_group_name = aws_db_subnet_group.primary.id
# We must have these values, because destroying or rolling back requires them
skip_final_snapshot = true
final_snapshot_identifier = "ci-aurora-cluster-backup"
tags = {
Name = local.db_cluster_0
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_instance" "primary" {
depends_on = [aws_rds_global_cluster.global_db]
provider = aws.primary
cluster_identifier = aws_rds_cluster.primary_cluster.id
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
instance_class = "db.${var.instance_class}.${var.instance_size}"
db_subnet_group_name = aws_db_subnet_group.primary.id
tags = {
Name = local.db_instance_name
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster" "dr_cluster" {
depends_on = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
provider = aws.dr
cluster_identifier = "${local.db_name}-cluster-${local.region_tags[1]}"
# db name now allowed to specified on secondary regions
# The engine and engine_version must be repeated in aws_rds_global_cluster,
# aws_rds_cluster, and aws_rds_cluster_instance to
# avoid "Value for engine should match" error
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
engine_mode = "global"
global_cluster_identifier = aws_rds_global_cluster.global_db.id
# backtrack and multi-master not supported by Aurora Global.
# cannot specify username/password in cross-region replication cluster:
backup_retention_period = 5
preferred_backup_window = "07:00-09:00"
db_subnet_group_name = aws_db_subnet_group.dr.id
# We must have these values, because destroying or rolling back requires them
skip_final_snapshot = true
final_snapshot_identifier = "ci-aurora-cluster-backup"
tags = {
Name = local.db_cluster_1
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_instance" "dr_instance" {
depends_on = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
provider = aws.dr
cluster_identifier = aws_rds_cluster.dr_cluster.id
engine = "aurora-mysql"
engine_version = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
instance_class = "db.${var.instance_class}.${var.instance_size}"
db_subnet_group_name = aws_db_subnet_group.dr.id
tags = {
Name = local.db_instance_name
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_db_subnet_group" "primary" {
name = "${local.db_name}-subnetgroup"
subnet_ids = var.subnet_ids
provider = aws.primary
tags = {
Name = "primary_subnet_group"
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_db_subnet_group" "dr" {
provider = aws.dr
name = "${local.db_name}-subnetgroup"
subnet_ids = var.dr_subnet_ids
tags = {
Name = "dr_subnet_group"
Stage = var.stage
CreatedBy = var.created_by
}
}
resource "aws_rds_cluster_parameter_group" "default" {
name = "rds-cluster-pg"
family = "aurora-mysql${var.mysql_version}"
description = "RDS default cluster parameter group"
parameter {
name = "character_set_server"
value = "utf8"
}
parameter {
name = "character_set_client"
value = "utf8"
}
parameter {
name = "aurora_parallel_query"
value = "ON"
apply_method = "pending-reboot"
}
}
在 ./modules/sns/main.tf
中,这是我在 ./modules
目录中调用 terraform apply
时添加的资源:
resource "aws_sns_topic" "foo_topic" {
name = "foo-${var.stage}-${var.topic_name}"
tags = {
Name = "foo-${var.stage}-${var.topic_name}"
Stage = var.stage
CreatedBy = var.created_by
CreatedOn = timestamp()
}
}
./modules/main.tf
:
terraform {
backend "s3" {
bucket = "terraform-remote-state-s3-bucket-unique-name"
key = "terraform.tfstate"
region = "us-east-2"
dynamodb_table = "TerraformLockTable"
}
}
provider "aws" {
alias = "primary"
region = var.region
}
provider "aws" {
alias = "dr"
region = var.dr_region
}
module "vpc" {
stage = var.stage
source = "./vpc"
providers = {
aws = aws.primary
}
}
module "dr_vpc" {
stage = var.stage
source = "./vpc"
providers = {
aws = aws.dr
}
}
module "vpc_security_group" {
source = "./vpc_security_group"
vpc_id = module.vpc.vpc_id
providers = {
aws = aws.primary
}
}
module "rds" {
source = "./rds"
stage = var.stage
created_by = var.created_by
vpc_id = module.vpc.vpc_id
subnet_ids = [module.vpc.subnet_a_id, module.vpc.subnet_b_id, module.vpc.subnet_c_id]
dr_subnet_ids = [module.dr_vpc.subnet_a_id, module.dr_vpc.subnet_b_id, module.dr_vpc.subnet_c_id]
region = var.region
username = var.rds_username
password = var.rds_password
providers = {
aws.primary = aws.primary
aws.dr = aws.dr
}
}
module "sns_start" {
stage = var.stage
source = "./sns"
topic_name = "start"
created_by = var.created_by
}
./modules/variables.tf
:
variable "region" {
default = "us-east-2"
}
variable "dr_region" {
default = "us-west-2"
}
variable "service" {
type = string
default = "foo-back"
description = "service to match what serverless framework deploys"
}
variable "stage" {
type = string
default = "sandbox"
description = "The stage to deploy: sandbox, dev, qa, uat, or prod"
validation {
condition = can(regex("sandbox|dev|qa|uat|prod", var.stage))
error_message = "The stage value must be a valid stage: sandbox, dev, qa, uat, or prod."
}
}
variable "created_by" {
description = "Company or vendor name followed by the username part of the email address"
}
variable "rds_username" {
description = "Username for rds"
}
variable "rds_password" {
description = "Password for rds"
}
./modules/sns/main.tf
:
resource "aws_sns_topic" "foo_topic" {
name = "foo-${var.stage}-${var.topic_name}"
tags = {
Name = "foo-${var.stage}-${var.topic_name}"
Stage = var.stage
CreatedBy = var.created_by
CreatedOn = timestamp()
}
}
./modules/sns/output.tf
:
output "sns_topic_arn" {
value = aws_sns_topic.foo_topic.arn
}
调试输出
两个输出都修改了密钥、名称、帐户 ID 等:
- 来自 运行ning
terraform apply
的plan
输出: https://gist.github.com/ystoneman/95df711ee0a11d44e035b9f8f39b75f3 - 申请前
state
:https://gist.github.com/ystoneman/5c842769c28e1ae5969f9aaff1556b37
预期行为
整个./modules/main.tf
已经创建好了,只添加了SNS模块,所以只需要创建SNS模块即可。
实际行为
但是,RDS 资源也受到影响,并且 terraform“声称”engine_mode
已从 provisioned
更改为 global
,即使它已经是 global
根据控制台:
plan
输出还表示 cluster_identifier
只是 known after apply
,因此 forces replacement
,但是,我认为 cluster_identifier
是让aws_rds_cluster
知道它属于aws_rds_global_cluster
,对于aws_rds_cluster_instance
知道它属于aws_rds_cluster
。
重现步骤
- 注释掉
module "sns_start"
- cd
./modules
terraform apply
(这一步完成后就是我包含的状态文件所在的位置)- 取消注释
module "sns_start"
terraform apply
(此时我提供调试输出)
重要事实
无论我 运行 从我的 Mac 还是在 AWS CodeBuild 中 运行 都会发生此问题。
参考资料
似乎 AWS Terraform tried to destory and rebuild RDS cluster 也引用了它,但它并不特定于全局集群,在全局集群中您确实需要标识符,以便实例和集群知道它们属于什么。
您似乎使用的是过时版本的 aws 提供程序并且错误地指定了 engine_mode
。有一个与此相关的错误票:https://github.com/hashicorp/terraform-provider-aws/issues/16088
它已在 3.15.0 版本中修复,您可以通过
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.15.0"
}
}
required_version = "~> 0.13"
}
此外,您应该从您的地形规范中完全删除 engine_mode
属性。