[aws-sdk-cpp][s3] 使用 getObject 下载二进制文件
[aws-sdk-cpp][s3] Downloading binary file with getObject
我正在尝试编写一些代码来从 AWS S3 服务器下载二进制文件。
我在下面写了那个代码,大约 200MB 的二进制文件看起来没问题,所以我认为它有效。
但是对于200MB~这样的大文件,会下载但是只下载文件的前半部分。
例如,一个视频文件(1.2GB)只下载了前半部分(460MB ~ 700MB)。
为什么会这样?关于 ofstream 特性?
// 3. file download from s3
{
string strTargetPath = hThis->m_strTargetPath;
Aws::SDKOptions options;
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::InitAPI(options);
{
// Download from s3 using GetObject
char *bucket_name = "mybucket";
std::string key_name = strTargetPath;
Aws::Client::ClientConfiguration clientConfig;
clientConfig.region = "ap-northeast-2";
//Aws::S3::S3Client s3_client;
std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
Aws::S3::Model::GetObjectRequest object_request;
object_request.WithBucket(bucket_name).WithKey(key_name.c_str());
// parse file name from path
string str_arr[1000];
int str_cnt = 0;
char *str_buff = new char[1000];
strcpy(str_buff, strTargetPath.c_str());
char *tok = strtok(str_buff, "/");
while (tok != nullptr) {
str_arr[str_cnt++] = string(tok);
tok = strtok(nullptr, "/");
}
string fileName = str_arr[str_cnt - 1];
auto get_object_outcome = s3_client.get()->GetObject(object_request);
if (get_object_outcome.IsSuccess())
{
Aws::OFStream local_file;
std::string strFileName = fileName;
hThis->m_origFileNameString = strFileName;
hThis->m_origFileName = strFileName.c_str();
// Writing file downloaded
local_file.open(hThis->m_origFileName, std::ios::out | std::ios::binary);
local_file << get_object_outcome.GetResult().GetBody().rdbuf();
hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");
TCHAR programpath[_MAX_PATH];
GetCurrentDirectory(_MAX_PATH, programpath);
hThis->m_valOriginFolderPath.Format(_T("%s\"), programpath);
hThis->m_valOriginFolderPath += hThis->m_origFileName;
}
else
{
hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
get_object_outcome.GetError().GetExceptionName() + " " +
get_object_outcome.GetError().GetMessage() + "\n");
hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
}
}
Aws::ShutdownAPI(options);
}
即使是现在,我也不知道为什么它不起作用。
但我将方法更改为如下所示并且有效。
此代码使下载的块数据直接发送到光盘。
所以它不会使用那么多内存。(大约 10~30MB)
// 3. file download from s3
{
string strTargetPath = hThis->m_strTargetPath;
Aws::SDKOptions options;
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::InitAPI(options);
{
// Download from s3 using GetObject
char *bucket_name = "nemodax-upload-dev";
std::string key_name = strTargetPath;
Aws::Client::ClientConfiguration clientConfig;
clientConfig.region = "ap-northeast-2";
//Aws::S3::S3Client s3_client;
std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
Aws::S3::Model::GetObjectRequest object_request;
object_request.WithBucket(bucket_name).WithKey(key_name.c_str());
// parse file name from path
string str_arr[1000];
int str_cnt = 0;
char *str_buff = new char[1000];
strcpy(str_buff, strTargetPath.c_str());
char *tok = strtok(str_buff, "/");
while (tok != nullptr) {
str_arr[str_cnt++] = string(tok);
tok = strtok(nullptr, "/");
}
string fileName = str_arr[str_cnt - 1];
// 다운로드하면서 스트림을 아래 fileName으로 지정하는 파일로 바로바로 저장 그래서 메모리를 별로 안먹는다.
object_request.SetResponseStreamFactory(
[=]() {
//return Aws::New<Aws::FStream>("S3DOWNLOAD", hThis->m_origFileName, std::ios_base::out | std::ios_base::binary);
return Aws::New<Aws::FStream>("S3DOWNLOAD", fileName, std::ios_base::out | std::ios_base::binary);
}
);
auto get_object_outcome = s3_client.get()->GetObject(object_request);
if (get_object_outcome.IsSuccess())
{
std::string strFileName = fileName;
hThis->m_origFileNameString = strFileName;
hThis->m_origFileName = strFileName.c_str();
hThis->Logger(CPrePackagerDlg::currentDateTime() + "file size: " + std::to_string(get_object_outcome.GetResult().GetContentLength()) + "\n");
hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");
// 다운로드된 원본 파일 경로를 멤버변수로 등록-> 추후 암호화때 이 경로를 참조함.
// 파일경로 + 파일명 조합
TCHAR programpath[_MAX_PATH];
GetCurrentDirectory(_MAX_PATH, programpath);
hThis->m_valOriginFolderPath.Format(_T("%s\"), programpath);
hThis->m_valOriginFolderPath += hThis->m_origFileName;
}
else
{
hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
get_object_outcome.GetError().GetExceptionName() + " " +
get_object_outcome.GetError().GetMessage() + "\n");
hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
}
}
Aws::ShutdownAPI(options);
}
Even now, I don't know exactly Why it doesn't work.
因为在您的初始示例中,它将整个文件存储在内存中。
在您的第二个代码片段中,您通过使用 fstream 作为响应流做了正确的事情。所以它现在立即写入磁盘。
我正在尝试编写一些代码来从 AWS S3 服务器下载二进制文件。
我在下面写了那个代码,大约 200MB 的二进制文件看起来没问题,所以我认为它有效。
但是对于200MB~这样的大文件,会下载但是只下载文件的前半部分。
例如,一个视频文件(1.2GB)只下载了前半部分(460MB ~ 700MB)。
为什么会这样?关于 ofstream 特性?
// 3. file download from s3
{
string strTargetPath = hThis->m_strTargetPath;
Aws::SDKOptions options;
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::InitAPI(options);
{
// Download from s3 using GetObject
char *bucket_name = "mybucket";
std::string key_name = strTargetPath;
Aws::Client::ClientConfiguration clientConfig;
clientConfig.region = "ap-northeast-2";
//Aws::S3::S3Client s3_client;
std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
Aws::S3::Model::GetObjectRequest object_request;
object_request.WithBucket(bucket_name).WithKey(key_name.c_str());
// parse file name from path
string str_arr[1000];
int str_cnt = 0;
char *str_buff = new char[1000];
strcpy(str_buff, strTargetPath.c_str());
char *tok = strtok(str_buff, "/");
while (tok != nullptr) {
str_arr[str_cnt++] = string(tok);
tok = strtok(nullptr, "/");
}
string fileName = str_arr[str_cnt - 1];
auto get_object_outcome = s3_client.get()->GetObject(object_request);
if (get_object_outcome.IsSuccess())
{
Aws::OFStream local_file;
std::string strFileName = fileName;
hThis->m_origFileNameString = strFileName;
hThis->m_origFileName = strFileName.c_str();
// Writing file downloaded
local_file.open(hThis->m_origFileName, std::ios::out | std::ios::binary);
local_file << get_object_outcome.GetResult().GetBody().rdbuf();
hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");
TCHAR programpath[_MAX_PATH];
GetCurrentDirectory(_MAX_PATH, programpath);
hThis->m_valOriginFolderPath.Format(_T("%s\"), programpath);
hThis->m_valOriginFolderPath += hThis->m_origFileName;
}
else
{
hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
get_object_outcome.GetError().GetExceptionName() + " " +
get_object_outcome.GetError().GetMessage() + "\n");
hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
}
}
Aws::ShutdownAPI(options);
}
即使是现在,我也不知道为什么它不起作用。
但我将方法更改为如下所示并且有效。
此代码使下载的块数据直接发送到光盘。
所以它不会使用那么多内存。(大约 10~30MB)
// 3. file download from s3
{
string strTargetPath = hThis->m_strTargetPath;
Aws::SDKOptions options;
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::InitAPI(options);
{
// Download from s3 using GetObject
char *bucket_name = "nemodax-upload-dev";
std::string key_name = strTargetPath;
Aws::Client::ClientConfiguration clientConfig;
clientConfig.region = "ap-northeast-2";
//Aws::S3::S3Client s3_client;
std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
Aws::S3::Model::GetObjectRequest object_request;
object_request.WithBucket(bucket_name).WithKey(key_name.c_str());
// parse file name from path
string str_arr[1000];
int str_cnt = 0;
char *str_buff = new char[1000];
strcpy(str_buff, strTargetPath.c_str());
char *tok = strtok(str_buff, "/");
while (tok != nullptr) {
str_arr[str_cnt++] = string(tok);
tok = strtok(nullptr, "/");
}
string fileName = str_arr[str_cnt - 1];
// 다운로드하면서 스트림을 아래 fileName으로 지정하는 파일로 바로바로 저장 그래서 메모리를 별로 안먹는다.
object_request.SetResponseStreamFactory(
[=]() {
//return Aws::New<Aws::FStream>("S3DOWNLOAD", hThis->m_origFileName, std::ios_base::out | std::ios_base::binary);
return Aws::New<Aws::FStream>("S3DOWNLOAD", fileName, std::ios_base::out | std::ios_base::binary);
}
);
auto get_object_outcome = s3_client.get()->GetObject(object_request);
if (get_object_outcome.IsSuccess())
{
std::string strFileName = fileName;
hThis->m_origFileNameString = strFileName;
hThis->m_origFileName = strFileName.c_str();
hThis->Logger(CPrePackagerDlg::currentDateTime() + "file size: " + std::to_string(get_object_outcome.GetResult().GetContentLength()) + "\n");
hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");
// 다운로드된 원본 파일 경로를 멤버변수로 등록-> 추후 암호화때 이 경로를 참조함.
// 파일경로 + 파일명 조합
TCHAR programpath[_MAX_PATH];
GetCurrentDirectory(_MAX_PATH, programpath);
hThis->m_valOriginFolderPath.Format(_T("%s\"), programpath);
hThis->m_valOriginFolderPath += hThis->m_origFileName;
}
else
{
hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
get_object_outcome.GetError().GetExceptionName() + " " +
get_object_outcome.GetError().GetMessage() + "\n");
hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
}
}
Aws::ShutdownAPI(options);
}
Even now, I don't know exactly Why it doesn't work.
因为在您的初始示例中,它将整个文件存储在内存中。 在您的第二个代码片段中,您通过使用 fstream 作为响应流做了正确的事情。所以它现在立即写入磁盘。