通过 Athena 和 CloudTrail 找到 EC2 实例的所有者

find the owner of EC2 instance by Athena and CloudTrail

为了知道每个EC2 instance的所有者,我通过Athena查询存储在S3中的cloudtrail logs

我在 Athena 中有一个 table,其结构如下:

CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
               type:STRING,
               principalid:STRING,
               arn:STRING,
               accountid:STRING,
               invokedby:STRING,
               accesskeyid:STRING,
               userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
               mfaauthenticated:STRING,
               creationdate:STRING>,
sessionissuer:STRUCT<  
               type:STRING,
               principalId:STRING,
               arn:STRING, 
               accountId:STRING,
               userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
               ARN:STRING,
               accountId:STRING,
               type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';

我想找到启动 EC2 实例的用户的身份,因此我需要解析字段 responseelements 并且只获取具有特定 instanceIDresponseelements 的行].

字段responseelements是这样的:

{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
    "items":[
    {"instanceId":"i-043543cb4c12",
    "imageId":"ami-078df974",
    "instanceState":{"code":0,"name":"pending"},
    "privateDnsName":"ip-444444.eu-west-1.compute.internal",
    "keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
    "instanceType":"t2.large",
    "launchTime":1488438050000,
    "placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
    "monitoring":{"state":"pending"},
    "subnetId":"subnet-d8fffff",
    "vpcId":"vpc-444435",
    "privateIpAddress":"10.0.42.49",
    "stateReason":{"code":"pending","message":"pending"},
    "architecture":"x86_64",
    "rootDeviceType":"ebs",
    "rootDeviceName":"/dev/xvda",
    "blockDeviceMapping":{},
    "virtualizationType":"hvm",
    "hypervisor":"xen",
    "clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
    "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
    "sourceDestCheck":true,
    "networkInterfaceSet":{
        "items":[
        {"networkInterfaceId":"eni-b16b66f0",
        "subnetId":"subnet-dffffff",
        "vpcId":"vpc-50fffff35",
        "ownerId":"xxxxxxxx",
        "status":"in-use",
        "macAddress":"fdsfdsfsdfqdsf",
        "privateIpAddress":"10.0.42.34234213",
        "privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
        "sourceDestCheck":true,
        "groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
        "attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
        "privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
        "ipv6AddressesSet":{},
        "tagSet":{}}]}
    ,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
    "ebsOptimized":false}
    ]
    },
    "requesterId":"226008221399"
}

这是我尝试过的查询:

SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id  
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx' 
AND eventname = 'RunInstances';

但是这将 instance_id 作为一个空列。 如何正确地从 resposneelement 中仅获取 instance_id?

我找到了查找 ECS 实例所有者的正确查询。这可能对某人有帮助!

SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id  
FROM cloudtrail_logs
WHERE account = 'xxxxxxx' 
AND eventname = 'RunInstances'
AND responseelements LIKE '%i-3434ecb4c12%' 
;

很好的回答!!!我一直在寻找!谢谢!一个小小的改变。在我的情况下,不需要帐户。它引发了这个错误:

SYNTAX_ERROR: line 3:7: Column 'account' cannot be resolved

我是这样运行的:

SELECT DISTINCT eventsource, 
                eventname, 
                useridentity.userName, 
                eventtime,
                json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id  
                FROM <myCloudLogTable>
                WHERE eventname = 'RunInstances'
                AND responseelements LIKE '<myinstanceId>';