分段上传到 Amazon Glacier:Content-Range 与 Content-Length 不兼容
Multipart upload to Amazon Glacier: Content-Range incompatible with Content-Length
我正在尝试将大约 1gb 的文件上传到 Amazon Glacier。有点武断,我决定把它分成32mb的部分并连续上传。
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(2**25)) # 32 mb in bytes
parts = math.floor(size_mb / 32)
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is bigger than the ones that come before.
lower = (p * (2**25))
upper = (((p + 1) * (2**25)) - 1) if (p + 1 < parts) else (size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=upload)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
这会生成有关第一个字节范围的错误。
---------------------------------------------------------------------------
InvalidParameterValueException Traceback (most recent call last)
<ipython-input-2-9dd3ac986601> in <module>()
93 uploadId=multi_up['uploadId'],
94 range='bytes {}-{}/*'.format(lower, upper),
---> 95 body=upload)
96 upload_info.append(up_part)
97 checksum = calculate_tree_hash(upload)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
251 "%s() only accepts keyword arguments." % py_operation_name)
252 # The "self" in this scope is referring to the BaseClient.
--> 253 return self._make_api_call(operation_name, kwargs)
254
255 _api_call.__name__ = str(py_operation_name)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
555 error_code = parsed_response.get("Error", {}).get("Code")
556 error_class = self.exceptions.from_code(error_code)
--> 557 raise error_class(parsed_response, operation_name)
558 else:
559 return parsed_response
InvalidParameterValueException: An error occurred (InvalidParameterValueException) when calling the UploadMultipartPart operation:
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
谁能看出我做错了什么?
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
您告诉 API 您正在发送前 32 MiB,但实际上您正在发送(建议发送)整个文件,因为 body=upload
和 upload
不只是第一部分,它是整个文件。 Content-Length
指的是这部分上传的大小,应该是33554432(32MiB)。
docs 确实是模棱两可的...
body
(bytes or seekable file-like object) -- The data to upload.
...但是 "data to upload" 似乎只引用这部分的数据,尽管 "seekable."
这个词
@Michael-sqlbot 非常正确,Content-Range
的问题是我传递的是整个文件而不是一部分。我使用 read()
方法解决了这个问题,但后来我发现了一个单独的问题,即(根据 docs),最后一部分必须与前面的部分大小相同或更小。这意味着使用 math.ceil()
而不是 math.floor()
来定义零件数。
工作代码是:
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
partSize=(2**25)
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(partSize)) # 32 mb in bytes
parts = math.ceil(size_mb / 32) # The number of <=32mb parts we need
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is now smaller than the ones that come before.
lower = (p * (partSize))
upper = (((p + 1) * (partSize)) - 1) if (p + 1 < parts) else (size-1)
read_size = upper-lower+1
file_part = upload.read(read_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=file_part)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
由于 Alex 的后续回答声称它 "works",我正在 posting 另一个适用于 Python 3.5 和 Ubuntu 16.04 的版本。我还从我们的生产端到端解决方案中添加了一些环境变量。
原来的 post 给了我一个错误,所以我对其进行了调整并进行了一些清理。希望这可以帮助需要此 Glacier 功能的人。使用带有 awscli 命令的 Shell 脚本不是那么干净。
import math
import boto3
import os
from botocore.utils import calculate_tree_hash
vault_name = os.getenv('GLACIER_VAULT_NAME')
file_name = os.getenv('GLACIER_UPLOAD_FILE')
if vault_name is None:
print('GLACIER_VAULT_NAME environment variable is required. Exiting.')
exit(1)
if file_name is None:
print('GLACIER_UPLOAD_FILE environment variable is required. Exiting.')
exit(2)
chunk_size = 2 ** 25
client = boto3.client('glacier')
client.create_vault(vaultName=vault_name)
upload_obj = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=file_name,
partSize=str(chunk_size))
file_size = os.path.getsize(file_name)
parts = math.ceil(file_size / chunk_size)
with open(file_name, 'rb') as upload:
for p in range(parts):
lower = p * chunk_size
upper = lower + chunk_size - 1
if upper > file_size:
upper = (file_size - lower) + lower - 1
file_part = upload.read(chunk_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
range='bytes {}-{}/{}'.format(lower,
upper,
file_size),
body=file_part)
# this needs a new file handler because calculate_tree_hash() processes
# the handler in a similar way to the loop above
checksum = calculate_tree_hash(open(file_name, 'rb'))
complete_up = client.complete_multipart_upload(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
archiveSize=str(file_size),
checksum=checksum)
print(complete_up)
我正在尝试将大约 1gb 的文件上传到 Amazon Glacier。有点武断,我决定把它分成32mb的部分并连续上传。
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(2**25)) # 32 mb in bytes
parts = math.floor(size_mb / 32)
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is bigger than the ones that come before.
lower = (p * (2**25))
upper = (((p + 1) * (2**25)) - 1) if (p + 1 < parts) else (size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=upload)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
这会生成有关第一个字节范围的错误。
---------------------------------------------------------------------------
InvalidParameterValueException Traceback (most recent call last)
<ipython-input-2-9dd3ac986601> in <module>()
93 uploadId=multi_up['uploadId'],
94 range='bytes {}-{}/*'.format(lower, upper),
---> 95 body=upload)
96 upload_info.append(up_part)
97 checksum = calculate_tree_hash(upload)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
251 "%s() only accepts keyword arguments." % py_operation_name)
252 # The "self" in this scope is referring to the BaseClient.
--> 253 return self._make_api_call(operation_name, kwargs)
254
255 _api_call.__name__ = str(py_operation_name)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
555 error_code = parsed_response.get("Error", {}).get("Code")
556 error_class = self.exceptions.from_code(error_code)
--> 557 raise error_class(parsed_response, operation_name)
558 else:
559 return parsed_response
InvalidParameterValueException: An error occurred (InvalidParameterValueException) when calling the UploadMultipartPart operation:
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
谁能看出我做错了什么?
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
您告诉 API 您正在发送前 32 MiB,但实际上您正在发送(建议发送)整个文件,因为 body=upload
和 upload
不只是第一部分,它是整个文件。 Content-Length
指的是这部分上传的大小,应该是33554432(32MiB)。
docs 确实是模棱两可的...
body
(bytes or seekable file-like object) -- The data to upload.
...但是 "data to upload" 似乎只引用这部分的数据,尽管 "seekable."
这个词@Michael-sqlbot 非常正确,Content-Range
的问题是我传递的是整个文件而不是一部分。我使用 read()
方法解决了这个问题,但后来我发现了一个单独的问题,即(根据 docs),最后一部分必须与前面的部分大小相同或更小。这意味着使用 math.ceil()
而不是 math.floor()
来定义零件数。
工作代码是:
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
partSize=(2**25)
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(partSize)) # 32 mb in bytes
parts = math.ceil(size_mb / 32) # The number of <=32mb parts we need
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is now smaller than the ones that come before.
lower = (p * (partSize))
upper = (((p + 1) * (partSize)) - 1) if (p + 1 < parts) else (size-1)
read_size = upper-lower+1
file_part = upload.read(read_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=file_part)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
由于 Alex 的后续回答声称它 "works",我正在 posting 另一个适用于 Python 3.5 和 Ubuntu 16.04 的版本。我还从我们的生产端到端解决方案中添加了一些环境变量。
原来的 post 给了我一个错误,所以我对其进行了调整并进行了一些清理。希望这可以帮助需要此 Glacier 功能的人。使用带有 awscli 命令的 Shell 脚本不是那么干净。
import math
import boto3
import os
from botocore.utils import calculate_tree_hash
vault_name = os.getenv('GLACIER_VAULT_NAME')
file_name = os.getenv('GLACIER_UPLOAD_FILE')
if vault_name is None:
print('GLACIER_VAULT_NAME environment variable is required. Exiting.')
exit(1)
if file_name is None:
print('GLACIER_UPLOAD_FILE environment variable is required. Exiting.')
exit(2)
chunk_size = 2 ** 25
client = boto3.client('glacier')
client.create_vault(vaultName=vault_name)
upload_obj = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=file_name,
partSize=str(chunk_size))
file_size = os.path.getsize(file_name)
parts = math.ceil(file_size / chunk_size)
with open(file_name, 'rb') as upload:
for p in range(parts):
lower = p * chunk_size
upper = lower + chunk_size - 1
if upper > file_size:
upper = (file_size - lower) + lower - 1
file_part = upload.read(chunk_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
range='bytes {}-{}/{}'.format(lower,
upper,
file_size),
body=file_part)
# this needs a new file handler because calculate_tree_hash() processes
# the handler in a similar way to the loop above
checksum = calculate_tree_hash(open(file_name, 'rb'))
complete_up = client.complete_multipart_upload(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
archiveSize=str(file_size),
checksum=checksum)
print(complete_up)