在雅典娜中创建地形数据库
terraform database creation in athena
我正在尝试使用 terraform 创建数据库,这对于糟糕的查询来说似乎非常复杂...
你能帮帮我吗?
我已经尝试 null_resource 使用 local-exec 和“外部”数据 Python...
我想我看错了方向
ex 在 terraform 0.12 中不起作用
resource "null_resource" "create-endpoint" {
provisioner "local-exec" {
query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
`timestamp_iso` string,
`app_identification` struct<service:string,app_name:string,app_type:string,stage:string>,
`user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>,
`action` struct<action_type:string,path:string>,
`result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY (
`year` int)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
command = "aws athena start-query-execution --query-string "query""
}
}
我想找到使用 terraform 执行此操作的最简单方法。
resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
name = "mytable"
database_name = aws_glue_catalog_database.aws_glue_catalog_database.name
table_type = "EXTERNAL_TABLE"
parameters = {
"classification" = "json"
}
storage_descriptor {
location = "s3://mybucket/myprefix"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
ser_de_info {
name = "myserdeinfo"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"paths" = "jsonrootname"
}
}
columns {
name = "column1"
type = "array<struct<resourcearn:string,tags:array<struct<key:string,value:string>>>>"
}
}
partition_keys {
name = "part1"
type = "string"
}
partition_keys {
name = "part2"
type = "string"
}
}
如果你想为雅典娜制作它,需要制作 glue
资源。
使用 terraform 尝试下面的代码。
variable "service_name" {
default = "demo-service"
}
variable "workspace" {
default = "dev"
}
variable "columns" {
default = {
id = "int"
type = "string"
status = "int"
created_at = "timestamp"
}
}
resource "aws_glue_catalog_database" "athena" {
name = "${var.service_name}_db"
}
resource "aws_glue_catalog_table" "athena" {
name = "${var.service_name}_logs"
database_name = "${aws_glue_catalog_database.athena.name}"
table_type = "EXTERNAL_TABLE"
parameters = {
EXTERNAL = "TRUE"
}
storage_descriptor {
location = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"
ser_de_info {
name = "jsonserde"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"serialization.format" = "1"
}
}
dynamic "columns" {
for_each = "${var.columns}"
content {
name = "${columns.key}"
type = "${columns.value}"
}
}
}
partition_keys {
name = "year"
type = "string"
}
partition_keys {
name = "month"
type = "string"
}
partition_keys {
name = "day"
type = "string"
}
partition_keys {
name = "hour"
type = "string"
}
}
refer to this repository : aws-serverless-data-pipeline-by-terraform
我正在尝试使用 terraform 创建数据库,这对于糟糕的查询来说似乎非常复杂...
你能帮帮我吗?
我已经尝试 null_resource 使用 local-exec 和“外部”数据 Python... 我想我看错了方向
ex 在 terraform 0.12 中不起作用
resource "null_resource" "create-endpoint" {
provisioner "local-exec" {
query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
`timestamp_iso` string,
`app_identification` struct<service:string,app_name:string,app_type:string,stage:string>,
`user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>,
`action` struct<action_type:string,path:string>,
`result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY (
`year` int)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
command = "aws athena start-query-execution --query-string "query""
}
}
我想找到使用 terraform 执行此操作的最简单方法。
resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
name = "mytable"
database_name = aws_glue_catalog_database.aws_glue_catalog_database.name
table_type = "EXTERNAL_TABLE"
parameters = {
"classification" = "json"
}
storage_descriptor {
location = "s3://mybucket/myprefix"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
ser_de_info {
name = "myserdeinfo"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"paths" = "jsonrootname"
}
}
columns {
name = "column1"
type = "array<struct<resourcearn:string,tags:array<struct<key:string,value:string>>>>"
}
}
partition_keys {
name = "part1"
type = "string"
}
partition_keys {
name = "part2"
type = "string"
}
}
如果你想为雅典娜制作它,需要制作 glue
资源。
使用 terraform 尝试下面的代码。
variable "service_name" {
default = "demo-service"
}
variable "workspace" {
default = "dev"
}
variable "columns" {
default = {
id = "int"
type = "string"
status = "int"
created_at = "timestamp"
}
}
resource "aws_glue_catalog_database" "athena" {
name = "${var.service_name}_db"
}
resource "aws_glue_catalog_table" "athena" {
name = "${var.service_name}_logs"
database_name = "${aws_glue_catalog_database.athena.name}"
table_type = "EXTERNAL_TABLE"
parameters = {
EXTERNAL = "TRUE"
}
storage_descriptor {
location = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"
ser_de_info {
name = "jsonserde"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"serialization.format" = "1"
}
}
dynamic "columns" {
for_each = "${var.columns}"
content {
name = "${columns.key}"
type = "${columns.value}"
}
}
}
partition_keys {
name = "year"
type = "string"
}
partition_keys {
name = "month"
type = "string"
}
partition_keys {
name = "day"
type = "string"
}
partition_keys {
name = "hour"
type = "string"
}
}
refer to this repository : aws-serverless-data-pipeline-by-terraform