如何从 node.js 中的 Amazon S3 存储桶同步下载文件

Question

我必须使用 node.js 从 S3 存储桶下载多个文件。为此，我必须编写 for loop 并调用 s3.getObject(param) 方法进行下载。下载文件后，我必须合并它们的内容。

我是这样写的：

var fileContentList = new ArrayList();

for(i=0; i<fileNameList.length i++){
    s3.getObject({ Bucket: "my-bucket", Key: fileNameList.get(i) }, function (error, data) {
    if (error != null) {
      alert("Failed to retrieve an object: " + error);
    } else {
      alert("Loaded " + data.ContentLength + " bytes");
      fileContentList.add(data.Body.toString());
    }
  }
);
}

//Do merging with the fileContentList.

但是由于 s3.getObject 是一个异步调用，当前线程继续运行并且在我进行合并时没有任何内容添加到 fileContentList。

我该如何解决这个问题？有什么想法吗？
他们在aws-sdk中有什么同步方法来下载文件吗？

Answer 1

Promises 是更好的方法，

var getObject = function(keyFile) {
    return new Promise(function(success, reject) {
        s3.getObject(
            { Bucket: "my-bucket", Key: keyFile },
            function (error, data) {
                if(error) {
                    reject(error);
                } else {
                    success(data);
                }
            }
        );
    });
}

var promises = [];
var fileContentList = new ArrayList();

for(i=0; i<fileNameList.length i++){
    promises.push(getObject(fileNameList.get(i)));
}

Promise.all(promises)
.then(function(results) {
    for(var index in results) {
        var data = results[index];
        fileContentList.add(data.Body.toString());
    }
    // continue your process here
})
.catch(function(err) {
    alert(err);
});

Answer 2

你可以在这里使用async each，它会并行下载所有文件。在此示例中，如果某些文件失败，下载将继续，如果您希望在发生错误时停止下载文件，请调用带有错误的回调，这将立即调用最终回调。

async documentation

var async = require('async');
var fileContentList = new ArrayList();
function downloadS3Multiple(done){
    async.each([
            function (callback) {
                s3.getObject({Bucket: "my-bucket", Key: fileNameList.get(i)}, function (err, res) {
                    if (err) {
                        alert("Failed to retrieve an object: " + error);
                        callback();
                    }
                    else {
                        alert("Loaded " + data.ContentLength + " bytes");
                        fileContentList.add(data.Body.toString());
                        callback();

                    }
                })
            }
        ], function (err, results) {
            done(err, fileContentList)
        });
}

Answer 3

在单独的列表中跟踪您启动的下载，并在每次下载完成时检查它们是否全部完成。

var fileContentList = new ArrayList();
var completedList = new ArrayList();
// function to setDone and initiate merge if all download attempts completed.
function setDone(i) {
    completedList[i]=true;
    var allDone= true;
    for(i=0; i<completedList.length && allDone=completedList[i] && allDone; i++);
    if(allDone) {
       mergeFiles();
    }
}

// fill completedList with a false value for each fill to be downloaded
for(i=0; i<fileNameList.length;i++) completedList.add(false);

// initiate the downloads
for(i=0; i<fileNameList.length; i++){
    s3.getObject({ Bucket: "my-bucket", Key: fileNameList.get(i) }, function (error, data) {
    if (error != null) {
      alert("Failed to retrieve an object: " + error);
    } else {
      alert("Loaded " + data.ContentLength + " bytes");
      fileContentList.add(data.Body.toString());
    }
    setDone(i);
  }
);
}

一个更优雅的解决方案，如果您只想在所有下载成功完成后合并文件：

var fileContentList = new ArrayList();

for(i=0; i<fileNameList.length i++){
    s3.getObject({ Bucket: "my-bucket", Key: fileNameList.get(i) }, function (error, data) {
    if (error != null) {
      alert("Failed to retrieve an object: " + error);
    } else {
      alert("Loaded " + data.ContentLength + " bytes");
      fileContentList.add(data.Body.toString());
    }
    if(fileContentList.length==fileNameList.length) combineFiles();
  }
);
}

Answer 4

我用这个解决了。虽然我没有尝试过 Alexander、Lena 和 Sébastian 的答案，但我相信他们提到的每个答案在这种情况下也适用。非常感谢他们的快速回复：

Async.eachSeries(casCustomersList, function (customerName, next){
        if(casCustomersList.length>0 && customerName != customerId) {

            var paramToAws = {
                Bucket: bucketName,
                Key: folderPath +'applicationContext-security-' + customerName + '.xml'      //file name
            };
            AWSFileAccessManager.downloadFile(paramToAws, function (error, result) {
                if (error) {
                    next(error);
                } else {
                    customerApplicationContext.add(result.Body.toString());
                    next();
                }

            });
        } else{
            next();
        }

    }, function(err) {
       //Write the rest of your logic here to process synchronously as it is the callback function

    }

如何从 node.js 中的 Amazon S3 存储桶同步下载文件

How to download a file from Amazon S3 bucket in node.js synchronously

javascript

amazon-s3

amazon-web-services

node.js

aws-sdk-nodejs