来自lambda的aws-sdk js runJobFlow不发送启动EMR集群的请求
aws-sdk js runJobFlow from lambda not sending request to launch EMR cluster
我正在尝试从 lambda 启动 EMR 集群,该 lambda 由 S3 存储桶中的事件触发。
触发器工作正常,然后我使用 runJobFlow
创建了一个 AWSRequest,但是 EMR 控制台中没有事件,集群也没有启动。
包括创建的 AWSRequest 在内的所有内容都已登录到 CloudWatch,但未记录任何错误。
它什么都不做
代码如下:
const aws = require('aws-sdk');
const emr = new aws.EMR({
apiVersion: '2009-03-31',
region: 'us-east-1'
});
const emrClusterConfig = (s3_input_path, s3_output_path) => {
const ret = {
Name:`cluster-for-job`,
ServiceRole: 'EMR_DefaultRole',
JobFlowRole: 'EMR_EC2_DefaultRole',
VisibleToAllUsers: true,
ScaleDownBehavior: 'TERMINATE_AT_TASK_COMPLETION',
LogUri: 's3n://log-uri/elasticmapreduce/',
ReleaseLabel: 'emr-5.29.0',
Instances:{
InstanceGroups: [
{
Name: 'Master Instance Group',
Market: 'ON_DEMAND',
InstanceRole: 'MASTER',
InstanceType: 'm5.xlarge',
InstanceCount: 1,
EbsConfiguration: {
EbsBlockDeviceConfigs: [
{
VolumeSpecification: {
SizeInGB: 32,
VolumeType: 'gp2',
},
VolumesPerInstance: 2
},
]
},
},
{
Name: 'Core Instance Group',
{... similar to master ...}
}
],
Ec2KeyName: 'my-keys',
Ec2SubnetId: 'my-subnet-id',
EmrManagedSlaveSecurityGroup:'sg-slave-security-group',
EmrManagedMasterSecurityGroup:'sg-master-security-group',
KeepJobFlowAliveWhenNoSteps: false,
TerminationProtected: false
},
Applications:[
{
'Name': 'Spark'
},
],
Configurations:[{
"Classification":"spark",
"Properties":{}
}],
Steps:[{
'Name': 'step',
'ActionOnFailure': 'TERMINATE_CLUSTER',
'HadoopJarStep': {
'Jar': 's3n://elasticmapreduce/libs/script-runner/script-runner.jar',
'Args': [
"/usr/bin/spark-submit", "--deploy-mode", "cluster",
's3://path-to-a-very-humble.jar', s3_input_path, s3_output_path
]
}
}],
}
return ret
}
exports.handler = async (event, context) => {
const record = event.Records[0];
const eventName = record.eventName;
if(eventName === 'ObjectCreated:Put' || eventName === 'ObjectCreated:Post' || eventName === 'ObjectCreated:CompleteMultipartUpload' || eventName === 'ObjectCreated:Copy'){
const s3_inputPath = 's3n://in-bucket/key';
const s3_outputPath = 's3n://out-bucket/key';
try{
const cluster_config = emrClusterConfig(s3_inputPath,s3_outputPath);
const AWS_EMRJobRequest = emr.runJobFlow(cluster_config)
AWS_EMRJobRequest
.on('success', function(response){ console.log("success => " + response)})
.on('error', function(response){ console.log("error => " + response)})
.on('complete', function(response){ console.log("complete => " + response)})
.send( function(err, data){
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
context.done(null,'λ Completed');
});
console.log('Finished Launching EMR cluster: ', AWS_EMRJobRequest)
}
catch(err){
console.log(err);
}
}
else{
console.log(`:: not interested in event ${eventName}`);
}
context.done(null, 'λ Completed');
};
我以前手动设置过这些集群,它们运行良好。我从 AWS CLI Export 中的信息复制了集群配置,以匹配我现有集群的设置。
这什么也没做,只是在最后 "Finished Launching EMR Cluster" 记录了请求对象,但什么也没发生。
aws 在收到响应之前终止函数,因为 AWSRequest 发出异步请求。由于您使用的是异步处理程序,因此您可以使用 AWS.Request.promise。这会立即启动服务调用和 returns 承诺,该承诺要么通过响应数据 属性 实现,要么因响应错误 属性.
而被拒绝。
let AWS_EMRJobRequest = emr.runJobFlow(cluster_config);
return AWS_EMRJobRequest.promise();
参考documentation了解更多信息。
我正在尝试从 lambda 启动 EMR 集群,该 lambda 由 S3 存储桶中的事件触发。
触发器工作正常,然后我使用 runJobFlow
创建了一个 AWSRequest,但是 EMR 控制台中没有事件,集群也没有启动。
包括创建的 AWSRequest 在内的所有内容都已登录到 CloudWatch,但未记录任何错误。
它什么都不做
代码如下:
const aws = require('aws-sdk');
const emr = new aws.EMR({
apiVersion: '2009-03-31',
region: 'us-east-1'
});
const emrClusterConfig = (s3_input_path, s3_output_path) => {
const ret = {
Name:`cluster-for-job`,
ServiceRole: 'EMR_DefaultRole',
JobFlowRole: 'EMR_EC2_DefaultRole',
VisibleToAllUsers: true,
ScaleDownBehavior: 'TERMINATE_AT_TASK_COMPLETION',
LogUri: 's3n://log-uri/elasticmapreduce/',
ReleaseLabel: 'emr-5.29.0',
Instances:{
InstanceGroups: [
{
Name: 'Master Instance Group',
Market: 'ON_DEMAND',
InstanceRole: 'MASTER',
InstanceType: 'm5.xlarge',
InstanceCount: 1,
EbsConfiguration: {
EbsBlockDeviceConfigs: [
{
VolumeSpecification: {
SizeInGB: 32,
VolumeType: 'gp2',
},
VolumesPerInstance: 2
},
]
},
},
{
Name: 'Core Instance Group',
{... similar to master ...}
}
],
Ec2KeyName: 'my-keys',
Ec2SubnetId: 'my-subnet-id',
EmrManagedSlaveSecurityGroup:'sg-slave-security-group',
EmrManagedMasterSecurityGroup:'sg-master-security-group',
KeepJobFlowAliveWhenNoSteps: false,
TerminationProtected: false
},
Applications:[
{
'Name': 'Spark'
},
],
Configurations:[{
"Classification":"spark",
"Properties":{}
}],
Steps:[{
'Name': 'step',
'ActionOnFailure': 'TERMINATE_CLUSTER',
'HadoopJarStep': {
'Jar': 's3n://elasticmapreduce/libs/script-runner/script-runner.jar',
'Args': [
"/usr/bin/spark-submit", "--deploy-mode", "cluster",
's3://path-to-a-very-humble.jar', s3_input_path, s3_output_path
]
}
}],
}
return ret
}
exports.handler = async (event, context) => {
const record = event.Records[0];
const eventName = record.eventName;
if(eventName === 'ObjectCreated:Put' || eventName === 'ObjectCreated:Post' || eventName === 'ObjectCreated:CompleteMultipartUpload' || eventName === 'ObjectCreated:Copy'){
const s3_inputPath = 's3n://in-bucket/key';
const s3_outputPath = 's3n://out-bucket/key';
try{
const cluster_config = emrClusterConfig(s3_inputPath,s3_outputPath);
const AWS_EMRJobRequest = emr.runJobFlow(cluster_config)
AWS_EMRJobRequest
.on('success', function(response){ console.log("success => " + response)})
.on('error', function(response){ console.log("error => " + response)})
.on('complete', function(response){ console.log("complete => " + response)})
.send( function(err, data){
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
context.done(null,'λ Completed');
});
console.log('Finished Launching EMR cluster: ', AWS_EMRJobRequest)
}
catch(err){
console.log(err);
}
}
else{
console.log(`:: not interested in event ${eventName}`);
}
context.done(null, 'λ Completed');
};
我以前手动设置过这些集群,它们运行良好。我从 AWS CLI Export 中的信息复制了集群配置,以匹配我现有集群的设置。
这什么也没做,只是在最后 "Finished Launching EMR Cluster" 记录了请求对象,但什么也没发生。
aws 在收到响应之前终止函数,因为 AWSRequest 发出异步请求。由于您使用的是异步处理程序,因此您可以使用 AWS.Request.promise。这会立即启动服务调用和 returns 承诺,该承诺要么通过响应数据 属性 实现,要么因响应错误 属性.
而被拒绝。let AWS_EMRJobRequest = emr.runJobFlow(cluster_config);
return AWS_EMRJobRequest.promise();
参考documentation了解更多信息。