将数据从 Azure Blob 存储复制到 AWS S3
Copy Data From Azure Blob Storage to AWS S3
我是 Azure 数据工厂的新手,有一个有趣的要求。
我需要将文件从 Azure Blob 存储移动到 Amazon S3,最好使用 Azure 数据工厂。
但是 S3 不支持作为接收器;
https://docs.microsoft.com/en-us/azure/data-factory/copy-activity-overview
我还从我在这里阅读的各种评论中了解到,您不能直接从 Blob 存储复制到 S3 - 您需要在本地下载文件,然后将其上传到 S3。
有谁知道数据工厂、SSIS 或 Azure Runbook 中的任何示例可以做这样的事情,我想一个选择是编写一个 azure logic-app 或从数据工厂调用的函数.
设法解决了这个问题 - 它可能对其他人有用。
我决定编写一个使用 HTTP 请求作为触发器的 azure 函数。
这两个帖子对我帮助很大;
Copy from Azure Blob to AWS S3 using C#
如果您使用的是 Azure 函数,请注意我对 Nuget 包的回答 2.x。
这是代码 - 您可以根据需要修改它的基础。
我 return 一个 JSON 序列化对象,因为 Azure 数据工厂需要它作为从管道发送的 http 请求的响应;
#r "Microsoft.WindowsAzure.Storage"
#r "Newtonsoft.Json"
#r "System.Net.Http"
using System.Net;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Primitives;
using Newtonsoft.Json;
using Microsoft.WindowsAzure.Storage.Blob;
using System.Net.Http;
using Amazon.S3;
using Amazon.S3.Model;
using Amazon.S3.Transfer;
using Amazon.S3.Util;
public static async Task<IActionResult> Run(HttpRequest req, ILogger log)
{
log.LogInformation("Example Function has recieved a HTTP Request");
// get Params from query string
string blobUri = req.Query["blobUri"];
string bucketName = req.Query["bucketName"];
// Validate query string
if (String.IsNullOrEmpty(blobUri) || String.IsNullOrEmpty(bucketName)) {
Result outcome = new Result("Invalid Parameters Passed to Function",false,"blobUri or bucketName is null or empty");
return new BadRequestObjectResult(outcome.ConvertResultToJson());
}
// cast the blob to its type
Uri blobAbsoluteUri = new Uri(blobUri);
CloudBlockBlob blob = new CloudBlockBlob(blobAbsoluteUri);
// Do the Copy
bool resultBool = await CopyBlob(blob, bucketName, log);
if (resultBool) {
Result outcome = new Result("Copy Completed",true,"Blob: " + blobUri + " Copied to Bucket: " + bucketName);
return (ActionResult)new OkObjectResult(outcome.ConvertResultToJson());
}
else {
Result outcome = new Result("ERROR",false,"Copy was not successful Please review Application Logs");
return new BadRequestObjectResult(outcome.ConvertResultToJson());
}
}
static async Task<bool> CopyBlob(CloudBlockBlob blob, string existingBucket, ILogger log) {
var accessKey = "myAwsKey";
var secretKey = "myAwsSecret";
var keyName = blob.Name;
// Make the client
AmazonS3Client myClient = new AmazonS3Client(accessKey, secretKey, Amazon.RegionEndpoint.EUWest1);
// Check the Target Bucket Exists;
bool bucketExists = await AmazonS3Util.DoesS3BucketExistAsync (myClient,existingBucket);
if (!bucketExists) {
log.LogInformation("Bucket: " + existingBucket + " does not exist or is inaccessible to the application");
return false;
}
// Set up the Transfer Utility
TransferUtility fileTransferUtility = new TransferUtility(myClient);
// Stream the file
try {
log.LogInformation("Starting Copy");
using (var stream = await blob.OpenReadAsync()) {
// Note: You need permissions to not be private on the source blob
log.LogInformation("Streaming");
await fileTransferUtility.UploadAsync(stream,existingBucket,keyName);
log.LogInformation("Streaming Done");
}
log.LogInformation("Copy completed");
}
catch (AmazonS3Exception e) {
log.LogInformation("Error encountered on server. Message:'{0}' when writing an object", e.Message);
}
catch (Exception e) {
log.LogInformation("Unknown encountered on server. Message:'{0}' when writing an object", e.Message);
return false;
}
return true;
}
public class Result {
public string result;
public bool outcome;
public string UTCtime;
public string details;
public Result(string msg, bool outcomeBool, string fullMsg){
result=msg;
UTCtime=DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt");
outcome=outcomeBool;
details=fullMsg;
}
public string ConvertResultToJson() {
return JsonConvert.SerializeObject(this);
}
}
使用 AzCopy 从 Azure 存储下载文件到临时本地存储库
您可以将文件从Azure Cloud storage下载到您的本地系统,只需按照以下命令,使用递归标志复制所有文件
azcopy /Source:[source_container_url] /Dest:[local_file_path] /Sourcekey:[source_storage_account_access_key] /s
使用aws s3 cp命令将本地文件上传到Amazon S3
aws s3 cp local_file_path s3://my-bucket/ --recursive
我是 Azure 数据工厂的新手,有一个有趣的要求。
我需要将文件从 Azure Blob 存储移动到 Amazon S3,最好使用 Azure 数据工厂。
但是 S3 不支持作为接收器;
https://docs.microsoft.com/en-us/azure/data-factory/copy-activity-overview
我还从我在这里阅读的各种评论中了解到,您不能直接从 Blob 存储复制到 S3 - 您需要在本地下载文件,然后将其上传到 S3。
有谁知道数据工厂、SSIS 或 Azure Runbook 中的任何示例可以做这样的事情,我想一个选择是编写一个 azure logic-app 或从数据工厂调用的函数.
设法解决了这个问题 - 它可能对其他人有用。
我决定编写一个使用 HTTP 请求作为触发器的 azure 函数。
这两个帖子对我帮助很大;
Copy from Azure Blob to AWS S3 using C#
如果您使用的是 Azure 函数,请注意我对 Nuget 包的回答 2.x。
这是代码 - 您可以根据需要修改它的基础。 我 return 一个 JSON 序列化对象,因为 Azure 数据工厂需要它作为从管道发送的 http 请求的响应;
#r "Microsoft.WindowsAzure.Storage"
#r "Newtonsoft.Json"
#r "System.Net.Http"
using System.Net;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Primitives;
using Newtonsoft.Json;
using Microsoft.WindowsAzure.Storage.Blob;
using System.Net.Http;
using Amazon.S3;
using Amazon.S3.Model;
using Amazon.S3.Transfer;
using Amazon.S3.Util;
public static async Task<IActionResult> Run(HttpRequest req, ILogger log)
{
log.LogInformation("Example Function has recieved a HTTP Request");
// get Params from query string
string blobUri = req.Query["blobUri"];
string bucketName = req.Query["bucketName"];
// Validate query string
if (String.IsNullOrEmpty(blobUri) || String.IsNullOrEmpty(bucketName)) {
Result outcome = new Result("Invalid Parameters Passed to Function",false,"blobUri or bucketName is null or empty");
return new BadRequestObjectResult(outcome.ConvertResultToJson());
}
// cast the blob to its type
Uri blobAbsoluteUri = new Uri(blobUri);
CloudBlockBlob blob = new CloudBlockBlob(blobAbsoluteUri);
// Do the Copy
bool resultBool = await CopyBlob(blob, bucketName, log);
if (resultBool) {
Result outcome = new Result("Copy Completed",true,"Blob: " + blobUri + " Copied to Bucket: " + bucketName);
return (ActionResult)new OkObjectResult(outcome.ConvertResultToJson());
}
else {
Result outcome = new Result("ERROR",false,"Copy was not successful Please review Application Logs");
return new BadRequestObjectResult(outcome.ConvertResultToJson());
}
}
static async Task<bool> CopyBlob(CloudBlockBlob blob, string existingBucket, ILogger log) {
var accessKey = "myAwsKey";
var secretKey = "myAwsSecret";
var keyName = blob.Name;
// Make the client
AmazonS3Client myClient = new AmazonS3Client(accessKey, secretKey, Amazon.RegionEndpoint.EUWest1);
// Check the Target Bucket Exists;
bool bucketExists = await AmazonS3Util.DoesS3BucketExistAsync (myClient,existingBucket);
if (!bucketExists) {
log.LogInformation("Bucket: " + existingBucket + " does not exist or is inaccessible to the application");
return false;
}
// Set up the Transfer Utility
TransferUtility fileTransferUtility = new TransferUtility(myClient);
// Stream the file
try {
log.LogInformation("Starting Copy");
using (var stream = await blob.OpenReadAsync()) {
// Note: You need permissions to not be private on the source blob
log.LogInformation("Streaming");
await fileTransferUtility.UploadAsync(stream,existingBucket,keyName);
log.LogInformation("Streaming Done");
}
log.LogInformation("Copy completed");
}
catch (AmazonS3Exception e) {
log.LogInformation("Error encountered on server. Message:'{0}' when writing an object", e.Message);
}
catch (Exception e) {
log.LogInformation("Unknown encountered on server. Message:'{0}' when writing an object", e.Message);
return false;
}
return true;
}
public class Result {
public string result;
public bool outcome;
public string UTCtime;
public string details;
public Result(string msg, bool outcomeBool, string fullMsg){
result=msg;
UTCtime=DateTime.Now.ToString("yyyy-MM-dd h:mm:ss tt");
outcome=outcomeBool;
details=fullMsg;
}
public string ConvertResultToJson() {
return JsonConvert.SerializeObject(this);
}
}
使用 AzCopy 从 Azure 存储下载文件到临时本地存储库
您可以将文件从Azure Cloud storage下载到您的本地系统,只需按照以下命令,使用递归标志复制所有文件
azcopy /Source:[source_container_url] /Dest:[local_file_path] /Sourcekey:[source_storage_account_access_key] /s
使用aws s3 cp命令将本地文件上传到Amazon S3
aws s3 cp local_file_path s3://my-bucket/ --recursive