使用 boto + Python 的 Amazon S3 上传失败
Amazon S3 upload fails using boto + Python
您好,我无法使用 boto 将文件上传到 S3。它失败并显示以下错误消息。有人可以帮助我吗,我是 python 和 boto 的新手。
from boto.s3 import connect_to_region
from boto.s3.connection import Location
from boto.s3.key import Key
import boto
import gzip
import os
AWS_KEY = ''
AWS_SECRET_KEY = ''
BUCKET_NAME = 'mybucketname'
conn = connect_to_region(Location.USWest2,aws_access_key_id = AWS_KEY,
aws_secret_access_key = AWS_SECRET_KEY,
is_secure=False,debug = 2
)
bucket = conn.lookup(BUCKET_NAME)
bucket2 = conn.lookup('unzipped-data')
rs = bucket.list()
rs2 = bucket2.list()
compressed_files = []
all_files = []
files_to_download = []
downloaded_files = []
path = "~/tmp/"
# Check if the file has already been decompressed
def filecheck():
for filename in bucket.list():
all_files.append(filename.name)
for n in rs2:
compressed_files.append(n.name)
for file_name in all_files:
if file_name.strip('.gz') in compressed_files:
pass;
elif '.gz' in file_name and 'indeed' in file_name:
files_to_download.append(file_name)
# Download necessary files
def download_files():
for name in rs:
if name.name in files_to_download:
file_name = name.name.split('/')
print('Downloading: '+ name.name).strip('\n')
file_name = name.name.split('/')
name.get_contents_to_filename(path+file_name[-1])
print(' - Completed')
# Decompressing the file
print('Decompressing: '+ name.name).strip('\n')
inF = gzip.open(path+file_name[-1], 'rb')
outF = open(path+file_name[-1].strip('.gz'), 'wb')
for line in inF:
outF.write(line)
inF.close()
outF.close()
print(' - Completed')
# Uploading file
print('Uploading: '+name.name).strip('\n')
full_key_name = name.name.strip('.gz')
k = Key(bucket2)
k.key = full_key_name
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
print('Completed')
# Clean Up
d_list = os.listdir(path)
for d in d_list:
os.remove(path+d)
# Function Calls
filecheck()
download_files()
错误信息:
Traceback (most recent call last):
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 86, in <module>
download_files()
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 75, in download_files
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1362, in set_contents_from_filename
encrypt_key=encrypt_key)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1293, in set_contents_from_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 750, in send_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 951, in _send_file_internal
query_args=query_args
File "C:\Python27\lib\site-packages\boto\s3\connection.py", line 664, in make_request
retry_handler=retry_handler
File "C:\Python27\lib\site-packages\boto\connection.py", line 1070, in make_request
retry_handler=retry_handler)
File "C:\Python27\lib\site-packages\boto\connection.py", line 1029, in _mexe
raise ex
socket.error: [Errno 10053] An established connection was aborted by the software in your host machine
我下载文件没有问题,但由于某些奇怪的原因上传失败。
如果问题是文件大小 (> 5GB),您应该使用分段上传:
http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html
在文档中搜索 multipart_upload:
http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.multipart
此外,请参阅此问题以了解相关问题:
How can I copy files bigger than 5 GB in Amazon S3?
这个过程有点不直观。您需要:
- 运行initiate_multipart_upload(),存储返回对象
- 将文件分成块(在磁盘上,或使用 CStringIO 从内存中读取)
- 将零件按顺序送入 upload_part_from_file()
- 运行 complete_upload() 在存储对象上
您好,我无法使用 boto 将文件上传到 S3。它失败并显示以下错误消息。有人可以帮助我吗,我是 python 和 boto 的新手。
from boto.s3 import connect_to_region
from boto.s3.connection import Location
from boto.s3.key import Key
import boto
import gzip
import os
AWS_KEY = ''
AWS_SECRET_KEY = ''
BUCKET_NAME = 'mybucketname'
conn = connect_to_region(Location.USWest2,aws_access_key_id = AWS_KEY,
aws_secret_access_key = AWS_SECRET_KEY,
is_secure=False,debug = 2
)
bucket = conn.lookup(BUCKET_NAME)
bucket2 = conn.lookup('unzipped-data')
rs = bucket.list()
rs2 = bucket2.list()
compressed_files = []
all_files = []
files_to_download = []
downloaded_files = []
path = "~/tmp/"
# Check if the file has already been decompressed
def filecheck():
for filename in bucket.list():
all_files.append(filename.name)
for n in rs2:
compressed_files.append(n.name)
for file_name in all_files:
if file_name.strip('.gz') in compressed_files:
pass;
elif '.gz' in file_name and 'indeed' in file_name:
files_to_download.append(file_name)
# Download necessary files
def download_files():
for name in rs:
if name.name in files_to_download:
file_name = name.name.split('/')
print('Downloading: '+ name.name).strip('\n')
file_name = name.name.split('/')
name.get_contents_to_filename(path+file_name[-1])
print(' - Completed')
# Decompressing the file
print('Decompressing: '+ name.name).strip('\n')
inF = gzip.open(path+file_name[-1], 'rb')
outF = open(path+file_name[-1].strip('.gz'), 'wb')
for line in inF:
outF.write(line)
inF.close()
outF.close()
print(' - Completed')
# Uploading file
print('Uploading: '+name.name).strip('\n')
full_key_name = name.name.strip('.gz')
k = Key(bucket2)
k.key = full_key_name
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
print('Completed')
# Clean Up
d_list = os.listdir(path)
for d in d_list:
os.remove(path+d)
# Function Calls
filecheck()
download_files()
错误信息:
Traceback (most recent call last):
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 86, in <module>
download_files()
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 75, in download_files
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1362, in set_contents_from_filename
encrypt_key=encrypt_key)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1293, in set_contents_from_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 750, in send_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 951, in _send_file_internal
query_args=query_args
File "C:\Python27\lib\site-packages\boto\s3\connection.py", line 664, in make_request
retry_handler=retry_handler
File "C:\Python27\lib\site-packages\boto\connection.py", line 1070, in make_request
retry_handler=retry_handler)
File "C:\Python27\lib\site-packages\boto\connection.py", line 1029, in _mexe
raise ex
socket.error: [Errno 10053] An established connection was aborted by the software in your host machine
我下载文件没有问题,但由于某些奇怪的原因上传失败。
如果问题是文件大小 (> 5GB),您应该使用分段上传:
http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html
在文档中搜索 multipart_upload: http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.multipart
此外,请参阅此问题以了解相关问题:
How can I copy files bigger than 5 GB in Amazon S3?
这个过程有点不直观。您需要:
- 运行initiate_multipart_upload(),存储返回对象
- 将文件分成块(在磁盘上,或使用 CStringIO 从内存中读取)
- 将零件按顺序送入 upload_part_from_file()
- 运行 complete_upload() 在存储对象上