将 zip 文件分段上传到 AWS Glacier 中途冻结
Multipart upload of zip file to AWS Glacier freezes midway
我正在尝试使用 aws-sdk 节点版本的 multipartupload 功能将 600mb .zip 文件上传到 glacier。我想出了如何将文件作为缓冲区读取并使用 aws 文档中的脚本开始上传。
脚本开始上传文件的每个部分,但每个部分都失败并显示 400 错误。
Uploading part 0 = bytes 0-2097151/*
Uploading part 2097152 = bytes 2097152-4194303/*
Uploading part 4194304 = bytes 4194304-6291455/*
Uploading part 6291456 = bytes 6291456-8388607/*
....
Uploading part 591396864 = bytes 591396864-591798963/*
//stops logging, then a couple seconds later, it starts returning an error message like this for each upload part:
{ [UnknownError: 400]
message: '400',
code: 'UnknownError',
statusCode: 400,
time: Tue Jan 10 2017 20:54:29 GMT-0500 (EST),
requestId: 'F16FEDE011D3039A',
retryable: false,
retryDelay: 91.54012566432357 }
下面是我使用的上传脚本
var AWS = require('aws-sdk');
var creds = <path to creds>
var fs = require('fs');
var filePath = <path to file>;
var encoding = "utf8";
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: 'us-west-1'
});
var glacier = new AWS.Glacier(myConfig)
var buffer = fs.readFileSync(filePath);
// var buffer = new Buffer(2.5 * 1024 * 1024); // 2.5MB buffer
var partSize = 1024 * 1024; // 1MB chunks,
var numPartsLeft = Math.ceil(buffer.length / partSize);
var startTime = new Date();
var params = {
accountId: '-',
vaultName: <vault name>
archiveDescription: '100media',
partSize: partSize.toString(),
};
// Compute the complete SHA-256 tree hash so we can pass it
// to completeMultipartUpload request at the end
var treeHash = glacier.computeChecksums(buffer).treeHash;
// Initiate the multipart upload
console.log('Initiating upload to', params.vaultName);
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < buffer.length; i += partSize) {
var end = Math.min(i + partSize, buffer.length),
partParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
range: 'bytes ' + i + '-' + (end-1) + '/*',
body: buffer.slice(i, end)
};
// Send a single part
console.log('Uploading part', i, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) return;
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
}
});
});
}
});
任何关于 400 错误从何而来的想法都将不胜感激!我之前没有使用过缓冲区或二进制数据,所以我可能会弄乱它的格式。另一个怀疑是我只是格式化冰川请求或参数错误。
这是我创建的一个脚本,它一次尝试一个分段上传。 “可以尝试重新设计为并发,但它按原样工作,如果上传失败则重试:
var minm = require('minimist');
var argv = require('minimist')(process.argv.slice(2));
var AWS = require('aws-sdk');
var creds = <path to local json creds>
var fs = require('fs');
var encoding = "utf8";
var partSize = 1024 * 1024; // 1MB chunks,
var startTime = new Date();
var byteIncrementer = 0;
var MBcounter = 0;
var multipart;
//move these out to args
var filePath = argv.filepath;
var vaultName = argv.vaultname
var archiveDescription = argv.description
if (!filePath) {
throw "ERROR: must pass file path via --filepath <filepath>"
}
if (!archiveDescription) {
throw "ERROR: must pass description path via --description <description>"
}
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: <region>
});
var params = {
accountId: '-',
vaultName: vaultName,
archiveDescription: archiveDescription,
partSize: partSize.toString(),
};
var buffer = fs.readFileSync(filePath);
var numPartsLeft = Math.ceil(buffer.length / partSize);
var glacier = new AWS.Glacier(myConfig)
var treeHash = glacier.computeChecksums(buffer).treeHash;
new Promise(function (resolve, reject) {
glacier.initiateMultipartUpload(params, function (mpErr, multi) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multi.uploadId);
multipart = multi
resolve();
});
}).then(function () {
console.log("total upload size: ", buffer.length);
recursivelyUploadPart(byteIncrementer)
}).catch(function (err) {console.log(err)});
function recursivelyUploadPart() {
var end = Math.min(byteIncrementer + partSize, buffer.length);
var partParams = {
accountId: '-',
uploadId: multipart.uploadId,
vaultName: params.vaultName,
range: 'bytes ' + byteIncrementer + '-' + (end-1) + '/*',
body: buffer.slice(byteIncrementer, end)
};
console.log('Uploading part', byteIncrementer, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) {
console.log('part upload error: ', multiErr)
console.log('retrying')
return recursivelyUploadPart(byteIncrementer)
} else {
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) {
MBcounter++;
console.log("MB Uploaded: ", MBcounter);
byteIncrementer += partSize;
console.log('recursing');
return recursivelyUploadPart(byteIncrementer);
} else {
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive: ", err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
console.log("==============================");
console.log('COMPLETED');
console.log("==============================");
}
});
}
}
});
};
如评论中所述,我似乎打开了大量的 http 连接并尝试同时执行所有操作,但这是行不通的。
我正在尝试使用 aws-sdk 节点版本的 multipartupload 功能将 600mb .zip 文件上传到 glacier。我想出了如何将文件作为缓冲区读取并使用 aws 文档中的脚本开始上传。
脚本开始上传文件的每个部分,但每个部分都失败并显示 400 错误。
Uploading part 0 = bytes 0-2097151/*
Uploading part 2097152 = bytes 2097152-4194303/*
Uploading part 4194304 = bytes 4194304-6291455/*
Uploading part 6291456 = bytes 6291456-8388607/*
....
Uploading part 591396864 = bytes 591396864-591798963/*
//stops logging, then a couple seconds later, it starts returning an error message like this for each upload part:
{ [UnknownError: 400]
message: '400',
code: 'UnknownError',
statusCode: 400,
time: Tue Jan 10 2017 20:54:29 GMT-0500 (EST),
requestId: 'F16FEDE011D3039A',
retryable: false,
retryDelay: 91.54012566432357 }
下面是我使用的上传脚本
var AWS = require('aws-sdk');
var creds = <path to creds>
var fs = require('fs');
var filePath = <path to file>;
var encoding = "utf8";
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: 'us-west-1'
});
var glacier = new AWS.Glacier(myConfig)
var buffer = fs.readFileSync(filePath);
// var buffer = new Buffer(2.5 * 1024 * 1024); // 2.5MB buffer
var partSize = 1024 * 1024; // 1MB chunks,
var numPartsLeft = Math.ceil(buffer.length / partSize);
var startTime = new Date();
var params = {
accountId: '-',
vaultName: <vault name>
archiveDescription: '100media',
partSize: partSize.toString(),
};
// Compute the complete SHA-256 tree hash so we can pass it
// to completeMultipartUpload request at the end
var treeHash = glacier.computeChecksums(buffer).treeHash;
// Initiate the multipart upload
console.log('Initiating upload to', params.vaultName);
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < buffer.length; i += partSize) {
var end = Math.min(i + partSize, buffer.length),
partParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
range: 'bytes ' + i + '-' + (end-1) + '/*',
body: buffer.slice(i, end)
};
// Send a single part
console.log('Uploading part', i, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) return;
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
}
});
});
}
});
任何关于 400 错误从何而来的想法都将不胜感激!我之前没有使用过缓冲区或二进制数据,所以我可能会弄乱它的格式。另一个怀疑是我只是格式化冰川请求或参数错误。
这是我创建的一个脚本,它一次尝试一个分段上传。 “可以尝试重新设计为并发,但它按原样工作,如果上传失败则重试:
var minm = require('minimist');
var argv = require('minimist')(process.argv.slice(2));
var AWS = require('aws-sdk');
var creds = <path to local json creds>
var fs = require('fs');
var encoding = "utf8";
var partSize = 1024 * 1024; // 1MB chunks,
var startTime = new Date();
var byteIncrementer = 0;
var MBcounter = 0;
var multipart;
//move these out to args
var filePath = argv.filepath;
var vaultName = argv.vaultname
var archiveDescription = argv.description
if (!filePath) {
throw "ERROR: must pass file path via --filepath <filepath>"
}
if (!archiveDescription) {
throw "ERROR: must pass description path via --description <description>"
}
var myConfig = new AWS.Config({
accessKeyId: creds.AccessKeyID,
secretAccessKey: creds.SecretAccessKey,
region: <region>
});
var params = {
accountId: '-',
vaultName: vaultName,
archiveDescription: archiveDescription,
partSize: partSize.toString(),
};
var buffer = fs.readFileSync(filePath);
var numPartsLeft = Math.ceil(buffer.length / partSize);
var glacier = new AWS.Glacier(myConfig)
var treeHash = glacier.computeChecksums(buffer).treeHash;
new Promise(function (resolve, reject) {
glacier.initiateMultipartUpload(params, function (mpErr, multi) {
if (mpErr) { console.log('Error!', mpErr.stack); return; }
console.log("Got upload ID", multi.uploadId);
multipart = multi
resolve();
});
}).then(function () {
console.log("total upload size: ", buffer.length);
recursivelyUploadPart(byteIncrementer)
}).catch(function (err) {console.log(err)});
function recursivelyUploadPart() {
var end = Math.min(byteIncrementer + partSize, buffer.length);
var partParams = {
accountId: '-',
uploadId: multipart.uploadId,
vaultName: params.vaultName,
range: 'bytes ' + byteIncrementer + '-' + (end-1) + '/*',
body: buffer.slice(byteIncrementer, end)
};
console.log('Uploading part', byteIncrementer, '=', partParams.range);
glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
if (multiErr) {
console.log('part upload error: ', multiErr)
console.log('retrying')
return recursivelyUploadPart(byteIncrementer)
} else {
console.log("Completed part", this.request.params.range);
if (--numPartsLeft > 0) {
MBcounter++;
console.log("MB Uploaded: ", MBcounter);
byteIncrementer += partSize;
console.log('recursing');
return recursivelyUploadPart(byteIncrementer);
} else {
var doneParams = {
vaultName: params.vaultName,
uploadId: multipart.uploadId,
archiveSize: buffer.length.toString(),
checksum: treeHash // the computed tree hash
};
console.log("Completing upload...");
glacier.completeMultipartUpload(doneParams, function(err, data) {
if (err) {
console.log("An error occurred while uploading the archive: ", err);
} else {
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Archive ID:', data.archiveId);
console.log('Checksum: ', data.checksum);
console.log("==============================");
console.log('COMPLETED');
console.log("==============================");
}
});
}
}
});
};
如评论中所述,我似乎打开了大量的 http 连接并尝试同时执行所有操作,但这是行不通的。