为什么 Terraform 强制更换极光全局数据库?

Why is terraform forcing replacement of aurora global database?

Terraform CLI 和 Terraform AWS 提供商版本

受影响的资源

Terraform 配置文件

    # inside ./modules/rds/main.tf

    terraform {
      required_providers {
        aws = {
          source = "hashicorp/aws"
        }
      }
      required_version = "~> 0.13"
    }
    
    provider "aws" {
      alias = "primary"
    }
    
    provider "aws" {
      alias = "dr"
    }
    
    locals {
      region_tags      = ["primary", "dr"]
      db_name          = "${var.project_name}-${var.stage}-db"
      db_cluster_0     = "${local.db_name}-cluster-${local.region_tags[0]}"
      db_cluster_1     = "${local.db_name}-cluster-${local.region_tags[1]}"
      db_instance_name = "${local.db_name}-instance"
    }
    
    resource "aws_rds_global_cluster" "global_db" {
      global_cluster_identifier = "${var.project_name}-${var.stage}"
      database_name             = "${var.project_name}${var.stage}db"
      engine                    = "aurora-mysql"
      engine_version            = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
      // force_destroy             = true
    }
    
    resource "aws_rds_cluster" "primary_cluster" {
      depends_on         = [aws_rds_global_cluster.global_db]
      provider           = aws.primary
      cluster_identifier = "${local.db_name}-cluster-${local.region_tags[0]}"
    
      # the database name does not allow dashes:
      database_name = "${var.project_name}${var.stage}db"
    
      # The engine and engine_version must be repeated in aws_rds_global_cluster,
      # aws_rds_cluster, and aws_rds_cluster_instance to 
      # avoid "Value for engine should match" error
      engine                    = "aurora-mysql"
      engine_version            = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
      engine_mode               = "global"
      global_cluster_identifier = aws_rds_global_cluster.global_db.id
    
      # backtrack and multi-master not supported by Aurora Global.
    
      master_username         = var.username
      master_password         = var.password
      backup_retention_period = 5
      preferred_backup_window = "07:00-09:00"
      db_subnet_group_name    = aws_db_subnet_group.primary.id
    
      # We must have these values, because destroying or rolling back requires them
      skip_final_snapshot       = true
      final_snapshot_identifier = "ci-aurora-cluster-backup"
    
      tags = {
        Name      = local.db_cluster_0
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_rds_cluster_instance" "primary" {
      depends_on           = [aws_rds_global_cluster.global_db]
      provider             = aws.primary
      cluster_identifier   = aws_rds_cluster.primary_cluster.id
      engine               = "aurora-mysql"
      engine_version       = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
      instance_class       = "db.${var.instance_class}.${var.instance_size}"
      db_subnet_group_name = aws_db_subnet_group.primary.id
    
      tags = {
        Name      = local.db_instance_name
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_rds_cluster" "dr_cluster" {
      depends_on         = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
      provider           = aws.dr
      cluster_identifier = "${local.db_name}-cluster-${local.region_tags[1]}"
    
      # db name now allowed to specified on secondary regions
    
      # The engine and engine_version must be repeated in aws_rds_global_cluster,
      # aws_rds_cluster, and aws_rds_cluster_instance to 
      # avoid "Value for engine should match" error
      engine                    = "aurora-mysql"
      engine_version            = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
      engine_mode               = "global"
      global_cluster_identifier = aws_rds_global_cluster.global_db.id
    
      # backtrack and multi-master not supported by Aurora Global.
    
      # cannot specify username/password in cross-region replication cluster:
      backup_retention_period = 5
      preferred_backup_window = "07:00-09:00"
      db_subnet_group_name    = aws_db_subnet_group.dr.id
    
      # We must have these values, because destroying or rolling back requires them
      skip_final_snapshot       = true
      final_snapshot_identifier = "ci-aurora-cluster-backup"
    
      tags = {
        Name      = local.db_cluster_1
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_rds_cluster_instance" "dr_instance" {
      depends_on           = [aws_rds_cluster_instance.primary, aws_rds_global_cluster.global_db]
      provider             = aws.dr
      cluster_identifier   = aws_rds_cluster.dr_cluster.id
      engine               = "aurora-mysql"
      engine_version       = "${var.mysql_version}.mysql_aurora.${var.aurora_version}"
      instance_class       = "db.${var.instance_class}.${var.instance_size}"
      db_subnet_group_name = aws_db_subnet_group.dr.id
    
      tags = {
        Name      = local.db_instance_name
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_db_subnet_group" "primary" {
      name       = "${local.db_name}-subnetgroup"
      subnet_ids = var.subnet_ids
      provider   = aws.primary
    
      tags = {
        Name      = "primary_subnet_group"
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_db_subnet_group" "dr" {
      provider   = aws.dr
      name       = "${local.db_name}-subnetgroup"
      subnet_ids = var.dr_subnet_ids
    
      tags = {
        Name      = "dr_subnet_group"
        Stage     = var.stage
        CreatedBy = var.created_by
      }
    }
    
    resource "aws_rds_cluster_parameter_group" "default" {
      name        = "rds-cluster-pg"
      family      = "aurora-mysql${var.mysql_version}"
      description = "RDS default cluster parameter group"
      parameter {
        name  = "character_set_server"
        value = "utf8"
      }
      parameter {
        name  = "character_set_client"
        value = "utf8"
      }
      parameter {
        name         = "aurora_parallel_query"
        value        = "ON"
        apply_method = "pending-reboot"
      }
    }

./modules/sns/main.tf 中,这是我在 ./modules 目录中调用 terraform apply 时添加的资源:

    resource "aws_sns_topic" "foo_topic" {
      name = "foo-${var.stage}-${var.topic_name}"
      tags = {
        Name      = "foo-${var.stage}-${var.topic_name}"
        Stage     = var.stage
        CreatedBy = var.created_by
        CreatedOn = timestamp()
      }
    }

./modules/main.tf:

    terraform {
      backend "s3" {
        bucket = "terraform-remote-state-s3-bucket-unique-name"
        key    = "terraform.tfstate"
        region = "us-east-2"
        dynamodb_table = "TerraformLockTable"
      }
    }

    provider "aws" {
      alias  = "primary"
      region = var.region
    }

    provider "aws" {
      alias  = "dr"
      region = var.dr_region
    }


    module "vpc" {
      stage  = var.stage
      source = "./vpc"
      providers = {
        aws = aws.primary
      }
    }
    module "dr_vpc" {
      stage  = var.stage
      source = "./vpc"
      providers = {
        aws = aws.dr
      }
    }

    module "vpc_security_group" {
      source = "./vpc_security_group"
      vpc_id = module.vpc.vpc_id
      providers = {
        aws = aws.primary
      }
    }


    module "rds" {
      source        = "./rds"
      stage         = var.stage
      created_by    = var.created_by
      vpc_id        = module.vpc.vpc_id
      subnet_ids    = [module.vpc.subnet_a_id, module.vpc.subnet_b_id, module.vpc.subnet_c_id]
      dr_subnet_ids = [module.dr_vpc.subnet_a_id, module.dr_vpc.subnet_b_id, module.dr_vpc.subnet_c_id]
      region        = var.region
      username      = var.rds_username
      password      = var.rds_password

      providers = {
        aws.primary = aws.primary
        aws.dr      = aws.dr
      }
    }

    module "sns_start" {
      stage      = var.stage
      source     = "./sns"
      topic_name = "start"
      created_by = var.created_by
    }

./modules/variables.tf:

variable "region" {
  default = "us-east-2"
}

variable "dr_region" {
  default = "us-west-2"
}
variable "service" {
  type        = string
  default     = "foo-back"
  description = "service to match what serverless framework deploys"
}

variable "stage" {
  type        = string
  default     = "sandbox"
  description = "The stage to deploy: sandbox, dev, qa, uat, or prod"

  validation {
    condition     = can(regex("sandbox|dev|qa|uat|prod", var.stage))
    error_message = "The stage value must be a valid stage: sandbox, dev, qa, uat, or prod."
  }
}

variable "created_by" {
  description = "Company or vendor name followed by the username part of the email address"
}

variable "rds_username" {
  description = "Username for rds"
}

variable "rds_password" {
  description = "Password for rds"
}

./modules/sns/main.tf:

resource "aws_sns_topic" "foo_topic" {
  name = "foo-${var.stage}-${var.topic_name}"
  tags = {
    Name      = "foo-${var.stage}-${var.topic_name}"
    Stage     = var.stage
    CreatedBy = var.created_by
    CreatedOn = timestamp()
  }
}

./modules/sns/output.tf:

output "sns_topic_arn" {
  value = aws_sns_topic.foo_topic.arn
}

调试输出

两个输出都修改了密钥、名称、帐户 ID 等:

预期行为

整个./modules/main.tf已经创建好了,只添加了SNS模块,所以只需要创建SNS模块即可。

实际行为

但是,RDS 资源也受到影响,并且 terraform“声称”engine_mode 已从 provisioned 更改为 global,即使它已经是 global 根据控制台:

plan 输出还表示 cluster_identifier 只是 known after apply,因此 forces replacement,但是,我认为 cluster_identifier 是让aws_rds_cluster知道它属于aws_rds_global_cluster,对于aws_rds_cluster_instance知道它属于aws_rds_cluster

重现步骤

  1. 注释掉 module "sns_start"
  2. cd ./modules
  3. terraform apply(这一步完成后就是我包含的状态文件所在的位置)
  4. 取消注释 module "sns_start"
  5. terraform apply(此时我提供调试输出)

重要事实

无论我 运行 从我的 Mac 还是在 AWS CodeBuild 中 运行 都会发生此问题。

参考资料

似乎 AWS Terraform tried to destory and rebuild RDS cluster 也引用了它,但它并不特定于全局集群,在全局集群中您确实需要标识符,以便实例和集群知道它们属于什么。

您似乎使用的是过时版本的 aws 提供程序并且错误地指定了 engine_mode。有一个与此相关的错误票:https://github.com/hashicorp/terraform-provider-aws/issues/16088
它已在 3.15.0 版本中修复,您可以通过

使用
terraform {
  required_providers {
    aws = {
      source = "hashicorp/aws"
      version = "~> 3.15.0"
    }
  }
  required_version = "~> 0.13"
}

此外,您应该从您的地形规范中完全删除 engine_mode 属性。