从多个文件字节流在内存中创建一个 tar 流
Creating a tar stream in memory from multiple file byte streams
我正在尝试在内存中创建一个 tar 流,向其中添加文件,然后将其保存到 S3。但是存在一些问题,ta 中的文件大小为零。任何人都可以建议吗?下面的代码片段-
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
显然,在将字节流添加到 tar 时,我们需要明确指定大小。
示例代码-
import tarfile
import uuid
import io
import os
def tar_and_upload():
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
for filename in os.listdir("images"):
print(filename)
file_path = os.path.join("images", filename)
#tar_file_obj.add(file_path)
with open(file_path, "rb") as f:
_bytes = f.read()
tar_info = tarfile.TarInfo(filename)
tar_info.size = len(_bytes)
tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
object_path = os.path.join("temp", obj_name)
with open(object_path, "wb") as f:
f.write(file_obj.getvalue())
print(obj_name)
except Exception as e:
print(str(e))
if __name__ == "__main__":
tar_and_upload()
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
info = tarfile.TarInfo(_file_name)
info.size = obj["ContentLength"]
info.mtime = s3.head_object(Bucket=bucket, Key=key)['LastModified'].timestamp()
tar_file_obj.addfile(info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
对于其他人,希望对 s3 对象执行相同的操作
我正在尝试在内存中创建一个 tar 流,向其中添加文件,然后将其保存到 S3。但是存在一些问题,ta 中的文件大小为零。任何人都可以建议吗?下面的代码片段-
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
显然,在将字节流添加到 tar 时,我们需要明确指定大小。 示例代码-
import tarfile
import uuid
import io
import os
def tar_and_upload():
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
for filename in os.listdir("images"):
print(filename)
file_path = os.path.join("images", filename)
#tar_file_obj.add(file_path)
with open(file_path, "rb") as f:
_bytes = f.read()
tar_info = tarfile.TarInfo(filename)
tar_info.size = len(_bytes)
tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
object_path = os.path.join("temp", obj_name)
with open(object_path, "wb") as f:
f.write(file_obj.getvalue())
print(obj_name)
except Exception as e:
print(str(e))
if __name__ == "__main__":
tar_and_upload()
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
info = tarfile.TarInfo(_file_name)
info.size = obj["ContentLength"]
info.mtime = s3.head_object(Bucket=bucket, Key=key)['LastModified'].timestamp()
tar_file_obj.addfile(info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
对于其他人,希望对 s3 对象执行相同的操作