AWS cdk python,具有每日触发器的胶水爬虫的哪个 IAM 角色?
AWS cdk python, which IAM role for a glue crawler with a daily trigger?
我正在尝试为 s3 部署 glue crawler
。不幸的是,我无法设法找到允许爬虫 运行 的适当 IAM 角色。我需要的权限只是对 S3 的 read/write 和 logs:PutLogsEvent,但不知何故我没有得到正确的权限。
这是我的代码,它可以部署但是 crawler
没有 运行.
的权限
from aws_cdk import (
aws_events as events,
aws_lambda as lambda_,
aws_events_targets as targets,
aws_iam as iam,
aws_glue as glue,
core
)
class MyStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
# what should I put in the role exactly?
glue_role = iam.Role(
self, 'Role__arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
)
glue_trigger = glue.CfnTrigger(self, "glue-daily-trigger",
name = "etl-trigger",
schedule = "cron(5 * * * ? *)", # every hour at X.05, every day
type="SCHEDULED",
actions=[
{
"jobName": "glue_crawler-daily"
}
],
start_on_creation=True
)
crawler_name = 'crawler_units_data'
glue_crawler = glue.CfnCrawler(
self, crawler_name,
name=crawler_name,
database_name='data_science',
role=glue_role.role_arn,#'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
targets={"s3Targets": [{"path": "s3://random_s3/units/"}]},
)
glue_trigger.add_depends_on(glue_crawler)
我尝试了几件事并翻译了 javascript 示例中的代码 like this one 但是从 javascript 调用的方法没有 100% 映射到 python.
这个角色(从 GUI 创建)工作正常并且有 2 个策略。
- 从 s3 读取和写入的策略
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject"
],
"Resource": [
"arn:aws:s3:::random_s3/units*"
]
}
]
}
- AWSGlueServicePolicy
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"glue:*",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeRouteTables",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"iam:ListRolePolicies",
"iam:GetRole",
"iam:GetRolePolicy",
"cloudwatch:PutMetricData"
],
"Resource": [
"*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:CreateBucket"
],
"Resource": [
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:*:*:/aws-glue/*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
}
]
}
事实证明,我需要以不同的方式传递名称和策略
glue_role = iam.Role(
self, 'glue_role_id2323',
role_name = 'Rolename',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSGlueServiceRole')]
)
我正在尝试为 s3 部署 glue crawler
。不幸的是,我无法设法找到允许爬虫 运行 的适当 IAM 角色。我需要的权限只是对 S3 的 read/write 和 logs:PutLogsEvent,但不知何故我没有得到正确的权限。
这是我的代码,它可以部署但是 crawler
没有 运行.
from aws_cdk import (
aws_events as events,
aws_lambda as lambda_,
aws_events_targets as targets,
aws_iam as iam,
aws_glue as glue,
core
)
class MyStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
# what should I put in the role exactly?
glue_role = iam.Role(
self, 'Role__arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
)
glue_trigger = glue.CfnTrigger(self, "glue-daily-trigger",
name = "etl-trigger",
schedule = "cron(5 * * * ? *)", # every hour at X.05, every day
type="SCHEDULED",
actions=[
{
"jobName": "glue_crawler-daily"
}
],
start_on_creation=True
)
crawler_name = 'crawler_units_data'
glue_crawler = glue.CfnCrawler(
self, crawler_name,
name=crawler_name,
database_name='data_science',
role=glue_role.role_arn,#'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole',
targets={"s3Targets": [{"path": "s3://random_s3/units/"}]},
)
glue_trigger.add_depends_on(glue_crawler)
我尝试了几件事并翻译了 javascript 示例中的代码 like this one 但是从 javascript 调用的方法没有 100% 映射到 python.
这个角色(从 GUI 创建)工作正常并且有 2 个策略。
- 从 s3 读取和写入的策略
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject"
],
"Resource": [
"arn:aws:s3:::random_s3/units*"
]
}
]
}
- AWSGlueServicePolicy
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"glue:*",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeRouteTables",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"iam:ListRolePolicies",
"iam:GetRole",
"iam:GetRolePolicy",
"cloudwatch:PutMetricData"
],
"Resource": [
"*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:CreateBucket"
],
"Resource": [
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:*:*:/aws-glue/*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
}
]
}
事实证明,我需要以不同的方式传递名称和策略
glue_role = iam.Role(
self, 'glue_role_id2323',
role_name = 'Rolename',
assumed_by=iam.ServicePrincipal('glue.amazonaws.com'),
managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSGlueServiceRole')]
)