通过 Athena 和 CloudTrail 找到 EC2 实例的所有者
find the owner of EC2 instance by Athena and CloudTrail
为了知道每个EC2 instance
的所有者,我通过Athena
查询存储在S3中的cloudtrail logs
。
我在 Athena 中有一个 table,其结构如下:
CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
type:STRING,
principalid:STRING,
arn:STRING,
accountid:STRING,
invokedby:STRING,
accesskeyid:STRING,
userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
mfaauthenticated:STRING,
creationdate:STRING>,
sessionissuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
ARN:STRING,
accountId:STRING,
type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';
我想找到启动 EC2 实例的用户的身份,因此我需要解析字段 responseelements
并且只获取具有特定 instanceID
的 responseelements
的行].
字段responseelements
是这样的:
{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
"items":[
{"instanceId":"i-043543cb4c12",
"imageId":"ami-078df974",
"instanceState":{"code":0,"name":"pending"},
"privateDnsName":"ip-444444.eu-west-1.compute.internal",
"keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
"instanceType":"t2.large",
"launchTime":1488438050000,
"placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
"monitoring":{"state":"pending"},
"subnetId":"subnet-d8fffff",
"vpcId":"vpc-444435",
"privateIpAddress":"10.0.42.49",
"stateReason":{"code":"pending","message":"pending"},
"architecture":"x86_64",
"rootDeviceType":"ebs",
"rootDeviceName":"/dev/xvda",
"blockDeviceMapping":{},
"virtualizationType":"hvm",
"hypervisor":"xen",
"clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"sourceDestCheck":true,
"networkInterfaceSet":{
"items":[
{"networkInterfaceId":"eni-b16b66f0",
"subnetId":"subnet-dffffff",
"vpcId":"vpc-50fffff35",
"ownerId":"xxxxxxxx",
"status":"in-use",
"macAddress":"fdsfdsfsdfqdsf",
"privateIpAddress":"10.0.42.34234213",
"privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
"sourceDestCheck":true,
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
"privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
"ipv6AddressesSet":{},
"tagSet":{}}]}
,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
"ebsOptimized":false}
]
},
"requesterId":"226008221399"
}
这是我尝试过的查询:
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx'
AND eventname = 'RunInstances';
但是这将 instance_id
作为一个空列。
如何正确地从 resposneelement
中仅获取 instance_id
?
我找到了查找 ECS 实例所有者的正确查询。这可能对某人有帮助!
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxx'
AND eventname = 'RunInstances'
AND responseelements LIKE '%i-3434ecb4c12%'
;
很好的回答!!!我一直在寻找!谢谢!一个小小的改变。在我的情况下,不需要帐户。它引发了这个错误:
SYNTAX_ERROR: line 3:7: Column 'account' cannot be resolved
我是这样运行的:
SELECT DISTINCT eventsource,
eventname,
useridentity.userName,
eventtime,
json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id
FROM <myCloudLogTable>
WHERE eventname = 'RunInstances'
AND responseelements LIKE '<myinstanceId>';
为了知道每个EC2 instance
的所有者,我通过Athena
查询存储在S3中的cloudtrail logs
。
我在 Athena 中有一个 table,其结构如下:
CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
type:STRING,
principalid:STRING,
arn:STRING,
accountid:STRING,
invokedby:STRING,
accesskeyid:STRING,
userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
mfaauthenticated:STRING,
creationdate:STRING>,
sessionissuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
ARN:STRING,
accountId:STRING,
type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';
我想找到启动 EC2 实例的用户的身份,因此我需要解析字段 responseelements
并且只获取具有特定 instanceID
的 responseelements
的行].
字段responseelements
是这样的:
{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
"items":[
{"instanceId":"i-043543cb4c12",
"imageId":"ami-078df974",
"instanceState":{"code":0,"name":"pending"},
"privateDnsName":"ip-444444.eu-west-1.compute.internal",
"keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
"instanceType":"t2.large",
"launchTime":1488438050000,
"placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
"monitoring":{"state":"pending"},
"subnetId":"subnet-d8fffff",
"vpcId":"vpc-444435",
"privateIpAddress":"10.0.42.49",
"stateReason":{"code":"pending","message":"pending"},
"architecture":"x86_64",
"rootDeviceType":"ebs",
"rootDeviceName":"/dev/xvda",
"blockDeviceMapping":{},
"virtualizationType":"hvm",
"hypervisor":"xen",
"clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"sourceDestCheck":true,
"networkInterfaceSet":{
"items":[
{"networkInterfaceId":"eni-b16b66f0",
"subnetId":"subnet-dffffff",
"vpcId":"vpc-50fffff35",
"ownerId":"xxxxxxxx",
"status":"in-use",
"macAddress":"fdsfdsfsdfqdsf",
"privateIpAddress":"10.0.42.34234213",
"privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
"sourceDestCheck":true,
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
"privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
"ipv6AddressesSet":{},
"tagSet":{}}]}
,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
"ebsOptimized":false}
]
},
"requesterId":"226008221399"
}
这是我尝试过的查询:
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx'
AND eventname = 'RunInstances';
但是这将 instance_id
作为一个空列。
如何正确地从 resposneelement
中仅获取 instance_id
?
我找到了查找 ECS 实例所有者的正确查询。这可能对某人有帮助!
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxx'
AND eventname = 'RunInstances'
AND responseelements LIKE '%i-3434ecb4c12%'
;
很好的回答!!!我一直在寻找!谢谢!一个小小的改变。在我的情况下,不需要帐户。它引发了这个错误:
SYNTAX_ERROR: line 3:7: Column 'account' cannot be resolved
我是这样运行的:
SELECT DISTINCT eventsource,
eventname,
useridentity.userName,
eventtime,
json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id
FROM <myCloudLogTable>
WHERE eventname = 'RunInstances'
AND responseelements LIKE '<myinstanceId>';