如何使用 Node.js 的 AWS SDK copy/move Amazon S3 中的所有对象从一个前缀到另一个

How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js

如何将所有对象从一个前缀复制到另一个?我已经尝试了所有可能的方法来将一个镜头中的所有对象从一个前缀复制到另一个,但似乎唯一可行的方法是遍历对象列表并将它们一个一个地复制。这真的很低效。如果我在一个文件夹中有数百个文件,我是否需要拨打 100 次电话?

var params = {
         Bucket: bucket,
         CopySource: bucket+'/'+oldDirName+'/filename.txt',
         Key: newDirName+'/filename.txt',
 };
s3.copyObject(params, function(err, data) {
  if (err) {
      callback.apply(this, [{
          type: "error",
          message: "Error while renaming Directory",
          data: err
      }]);
  } else {
      callback.apply(this, [{
          type: "success",
          message: "Directory renamed successfully",
          data: data
      }]);
  }
});

您需要创建一个 AWS.S3.listObjects() 来列出您的带有特定前缀的对象。但是您是正确的,因为您需要为要从一个 bucket/prefix 复制到相同或另一个 bucket/prefix.

的每个对象调用一次

您还可以使用 async 等实用程序库来管理您的请求。

var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'foo';
var oldPrefix = 'abc/';
var newPrefix = 'xyz/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});

var done = function(err, data) {
  if (err) console.log(err);
  else console.log(data);
};

s3.listObjects({Prefix: oldPrefix}, function(err, data) {
  if (data.Contents.length) {
    async.each(data.Contents, function(file, cb) {
      var params = {
        Bucket: bucketName,
        CopySource: bucketName + '/' + file.Key,
        Key: file.Key.replace(oldPrefix, newPrefix)
      };
      s3.copyObject(params, function(copyErr, copyData){
        if (copyErr) {
          console.log(copyErr);
        }
        else {
          console.log('Copied: ', params.Key);
          cb();
        }
      });
    }, done);
  }
});

希望对您有所帮助!

对 Aditya Manohar 代码的一个小改动,改进了 s3.copyObject 函数中的错误处理,并在执行复制请求后通过删除源文件实际完成 "move" 请求:

const AWS = require('aws-sdk');
const async = require('async');
const bucketName = 'foo';
const oldPrefix = 'abc/';
const newPrefix = 'xyz/';

const s3 = new AWS.S3({
    params: {
        Bucket: bucketName
    },
    region: 'us-west-2'
});


// 1) List all the objects in the source "directory"
s3.listObjects({
    Prefix: oldPrefix
}, function (err, data) {



    if (data.Contents.length) {

        // Build up the paramters for the delete statement
        let paramsS3Delete = {
            Bucket: bucketName,
            Delete: {
                Objects: []
            }
        };

        // Expand the array with all the keys that we have found in the ListObjects function call, so that we can remove all the keys at once after we have copied all the keys
        data.Contents.forEach(function (content) {
            paramsS3Delete.Delete.Objects.push({
                Key: content.Key
            });
        });

        // 2) Copy all the source files to the destination
        async.each(data.Contents, function (file, cb) {
            var params = {
                CopySource: bucketName + '/' + file.Key,
                Key: file.Key.replace(oldPrefix, newPrefix)
            };
            s3.copyObject(params, function (copyErr, copyData) {

                if (copyErr) {
                    console.log(err);
                } else {
                    console.log('Copied: ', params.Key);
                }
                cb();
            });
        }, function (asyncError, asyncData) {
            // All the requests for the file copy have finished
            if (asyncError) {
                return console.log(asyncError);
            } else {
                console.log(asyncData);

                // 3) Now remove the source files - that way we effectively moved all the content
                s3.deleteObjects(paramsS3Delete, (deleteError, deleteData) => {
                    if (deleteError) return console.log(deleteError);

                    return console.log(deleteData);
                })

            }
        });
    }
});

请注意,我已将 cb() 回调函数移到 if-then-else 循环之外。这样即使发生错误,异步模块也会触发 done() 函数。

这是一个以 "async await" 方式完成的代码片段:

const AWS = require('aws-sdk');
AWS.config.update({
  credentials: new AWS.Credentials(....), // credential parameters
});
AWS.config.setPromisesDependency(require('bluebird'));
const s3 = new AWS.S3();

... ...

const bucketName = 'bucketName';        // example bucket
const folderToMove = 'folderToMove/';   // old folder name
const destinationFolder = 'destinationFolder/'; // new destination folder 
try {
    const listObjectsResponse = await s3.listObjects({
        Bucket: bucketName,
        Prefix: folderToMove,
        Delimiter: '/',
    }).promise();

    const folderContentInfo = listObjectsResponse.Contents;
    const folderPrefix = listObjectsResponse.Prefix;

    await Promise.all(
      folderContentInfo.map(async (fileInfo) => {
        await s3.copyObject({
          Bucket: bucketName,
          CopySource: `${bucketName}/${fileInfo.Key}`,  // old file Key
          Key: `${destinationFolder}/${fileInfo.Key.replace(folderPrefix, '')}`, // new file Key
        }).promise();
    
        await s3.deleteObject({
          Bucket: bucketName,
          Key: fileInfo.Key,
        }).promise();
      })
    );
} catch (err) {
  console.error(err); // error handling
}

对递归复制文件夹的原始代码进行更多更新。一些限制是代码不能处理每个前缀超过 1000 个对象,如果您的文件夹很深,当然还有深度限制。

import AWS from 'aws-sdk';

AWS.config.update({ region: 'ap-southeast-1' });

/**
 * Copy s3 folder
 * @param {string} bucket Params for the first argument
 * @param {string} source for the 2nd argument
 * @param {string} dest for the 2nd argument
 * @returns {promise} the get object promise
 */
export default async function s3CopyFolder(bucket, source, dest) {
  // sanity check: source and dest must end with '/'
  if (!source.endsWith('/') || !dest.endsWith('/')) {
    return Promise.reject(new Error('source or dest must ends with fwd slash'));
  }

  const s3 = new AWS.S3();

  // plan, list through the source, if got continuation token, recursive
  const listResponse = await s3.listObjectsV2({
    Bucket: bucket,
    Prefix: source,
    Delimiter: '/',
  }).promise();

  // copy objects
  await Promise.all(
    listResponse.Contents.map(async (file) => {
      await s3.copyObject({
        Bucket: bucket,
        CopySource: `${bucket}/${file.Key}`,
        Key: `${dest}${file.Key.replace(listResponse.Prefix, '')}`,
      }).promise();
    }),
  );

  // recursive copy sub-folders
  await Promise.all(
    listResponse.CommonPrefixes.map(async (folder) => {
      await s3CopyFolder(
        bucket,
        `${folder.Prefix}`,
        `${dest}${folder.Prefix.replace(listResponse.Prefix, '')}`,
      );
    }),
  );

  return Promise.resolve('ok');
}

上面的

None 处理大目录,因为 list-objects-v2 命令 returns 一次不超过 1000 个结果,提供一个继续令牌来访问额外的“页面”。

这是一个使用现代 v3 SDK 的解决方案:

const copyAll = async ({
  s3Client,
  sourceBucket,
  targetBucket = sourceBucket,
  sourcePrefix,
  targetPrefix,
  concurrency = 1,
  deleteSource = false,
}) => {
  let ContinuationToken;

  const copyFile = async (sourceKey) => {
    const targetKey = sourceKey.replace(sourcePrefix, targetPrefix);

    await s3Client.send(
      new CopyObjectCommand({
        Bucket: targetBucket,
        Key: targetKey,
        CopySource: `${sourceBucket}/${sourceKey}`,
      }),
    );

    if (deleteSource) {
      await s3Client.send(
        new DeleteObjectCommand({
          Bucket: sourceBucket,
          Key: sourceKey,
        }),
      );
    }
  };

  do {
    const { Contents = [], NextContinuationToken } = await s3Client.send(
      new ListObjectsV2Command({
        Bucket: sourceBucket,
        Prefix: sourcePrefix,
        ContinuationToken,
      }),
    );

    const sourceKeys = Contents.map(({ Key }) => Key);

    await Promise.all(
      new Array(concurrency).fill(null).map(async () => {
        while (sourceKeys.length) {
          await copyFile(sourceKeys.pop());
        }
      }),
    );

    ContinuationToken = NextContinuationToken;
  } while (ContinuationToken);
};

如果 Promise.all 部分不清楚,那只是一个穷人的“线程池”,允许您同时复制多个文件,这可以大大加快速度。这些不使用任何带宽,因为内容是在 AWS 中复制的,所以我对 concurrency 的值为 20 或更多没有问题。为清楚起见,它只是以下的并行版本:

const sourceKeys = Contents.map(({ Key }) => Key);

while (sourceKeys.length) {
  await copyFile(sourceKeys.pop());
}

这是我用来移动多个对象的方法。

const asyncForEach = async (array, callback) => {
  for (let i = 0; i < array.length; i++) {
    await callback(array[i], i, array)
  }
}

const awsMove = async ({ files }) => {
  try {
    const s3 = new aws.S3()
    const AWS_BUCKET = 'bucket'

    await asyncForEach(files, async file => {
      const copyParams = {
        Key: file.newPath,
        ACL: 'public-read',
        Bucket: AWS_BUCKET,
        CopySource: encodeURI(`/${AWS_BUCKET}/${file.oldPath}`)
      }
      await s3.copyObject(copyParams).promise()

      const deleteParams = {
        Key: file.oldPath,
        Bucket: AWS_BUCKET
      }
      await s3.deleteObject(deleteParams).promise()
    })
  } catch (err) {
    console.log(err)
  }
}

const files = [
  { oldPath: 'folder/file', newPath: 'folder-copy/file' },
  { oldPath: 'another-folder/file', newPath: 'another-folder-copy/file' }
]
await awsMove({ files })