在雅典娜中创建地形数据库

terraform database creation in athena

我正在尝试使用 terraform 创建数据库,这对于糟糕的查询来说似乎非常复杂...

你能帮帮我吗?

我已经尝试 null_resource 使用 local-exec 和“外部”数据 Python... 我想我看错了方向

ex 在 terraform 0.12 中不起作用

resource "null_resource" "create-endpoint" {
  provisioner "local-exec" {
  query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
  `timestamp_iso` string, 
  `app_identification` struct<service:string,app_name:string,app_type:string,stage:string>, 
  `user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>, 
  `action` struct<action_type:string,path:string>, 
  `result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY ( 
  `year` int)
ROW FORMAT SERDE 
  'org.openx.data.jsonserde.JsonSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
    command = "aws athena start-query-execution --query-string "query""
  }
}

我想找到使用 terraform 执行此操作的最简单方法。

resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
  name = "mytable"
  database_name = aws_glue_catalog_database.aws_glue_catalog_database.name

  table_type = "EXTERNAL_TABLE"
  parameters = {
    "classification" = "json"
  }
  storage_descriptor {
    location      = "s3://mybucket/myprefix"
    input_format  = "org.apache.hadoop.mapred.TextInputFormat"
    output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"

  ser_de_info {
    name = "myserdeinfo"
      serialization_library = "org.openx.data.jsonserde.JsonSerDe"
      parameters = {
        "paths" = "jsonrootname"
      }
    }

    columns {
      name    = "column1"
      type    = "array<struct<resourcearn:string,tags:array<struct<key:string,value:string>>>>"
    }
  }
  partition_keys {
    name    = "part1"
    type    = "string"
  }
  partition_keys {
    name    = "part2"
    type    = "string"
  }
}

如果你想为雅典娜制作它,需要制作 glue 资源。

使用 terraform 尝试下面的代码。

variable "service_name" {
  default = "demo-service"
}

variable "workspace" {
  default = "dev"
}

variable "columns" {
  default = {
    id         = "int"
    type       = "string"
    status     = "int"
    created_at = "timestamp"
  }
}

resource "aws_glue_catalog_database" "athena" {
  name = "${var.service_name}_db"
}

resource "aws_glue_catalog_table" "athena" {
  name          = "${var.service_name}_logs"
  database_name = "${aws_glue_catalog_database.athena.name}"
  table_type    = "EXTERNAL_TABLE"

  parameters = {
    EXTERNAL = "TRUE"
  }

  storage_descriptor {
    location      = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
    input_format  = "org.apache.hadoop.mapred.TextInputFormat"
    output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"

    ser_de_info {
      name                  = "jsonserde"
      serialization_library = "org.openx.data.jsonserde.JsonSerDe"

      parameters = {
        "serialization.format" = "1"
      }
    }

    dynamic "columns" {
      for_each = "${var.columns}"

      content {
        name = "${columns.key}"
        type = "${columns.value}"
      }
    }
  }
  partition_keys {
    name = "year"
    type = "string"
  }
  partition_keys {
    name = "month"
    type = "string"
  }
  partition_keys {
    name = "day"
    type = "string"
  }
  partition_keys {
    name = "hour"
    type = "string"
  }
}

refer to this repository : aws-serverless-data-pipeline-by-terraform