使用 boto 压缩文件而不是 sfs3
use boto for gzipping files instead of sfs3
import contextlib
import gzip
import s3fs
AWS_S3 = s3fs.S3FileSystem(anon=False) # AWS env must be set up correctly
source_file_path = "/tmp/your_file.txt"
s3_file_path = "my-bucket/your_file.txt.gz"
with contextlib.ExitStack() as stack:
source_file = stack.enter_context(open(source_file_path , mode="rb"))
destination_file = stack.enter_context(AWS_S3.open(s3_file_path, mode="wb"))
destination_file_gz = stack.enter_context(gzip.GzipFile(fileobj=destination_file))
while True:
chunk = source_file.read(1024)
if not chunk:
break
destination_file_gz.write(chunk)
我试图 运行 在 AWS Lambda 函数上执行类似的操作,但它抛出错误,因为它无法安装 s3fs 模块。另外,我在代码的其余部分使用 boto,所以我想坚持使用 boto。我如何也可以为此使用 boto?
基本上,我是 opening/reading 来自“/tmp/path”的文件,对其进行 gzip 压缩,然后保存到 S3 存储桶
编辑:
s3_resource = boto3.resource('s3')
bucket = s3_resource.Bucket('testunzipping')
s3_filename = 'samplefile.csv.'
for i in testList:
#zip_ref.open(i, ‘r’)
with contextlib.ExitStack() as stack:
source_file = stack.enter_context(open(i , mode="rb"))
destination_file = io.BytesIO()
destination_file_gz = stack.enter_context(gzip.GzipFile(fileobj=destination_file, mode='wb'))
while True:
chunk = source_file.read(1024)
if not chunk:
break
destination_file_gz.write(chunk)
destination_file.seek(0)
fileName = i.replace("/tmp/DataPump_10000838/", "")
bucket.upload_fileobj(destination_file, fileName)
测试列表中的每一项看起来像这样 "/tmp/your_file.txt"
AWS Lambda function but it throws an error because It Is unable to install the s3fs module
其他包和您自己的库代码(可重用代码)应放在 lambda 层中。
How I can use boto for this too?
s3 = boto3.resource("s3")
bucket = s3.Bucket(bucket_name)
然后:
如果您的文件在内存中(类文件对象,以字节模式打开,例如 io.BytesIO
或只是 open(..., 'b')
)
bucket.upload_fileobj(fileobj, s3_filename)
或者如果您当前的 space 中有一个文件:
bucket.upload_file(filepath, s3_filename)
import contextlib
import gzip
import s3fs
AWS_S3 = s3fs.S3FileSystem(anon=False) # AWS env must be set up correctly
source_file_path = "/tmp/your_file.txt"
s3_file_path = "my-bucket/your_file.txt.gz"
with contextlib.ExitStack() as stack:
source_file = stack.enter_context(open(source_file_path , mode="rb"))
destination_file = stack.enter_context(AWS_S3.open(s3_file_path, mode="wb"))
destination_file_gz = stack.enter_context(gzip.GzipFile(fileobj=destination_file))
while True:
chunk = source_file.read(1024)
if not chunk:
break
destination_file_gz.write(chunk)
我试图 运行 在 AWS Lambda 函数上执行类似的操作,但它抛出错误,因为它无法安装 s3fs 模块。另外,我在代码的其余部分使用 boto,所以我想坚持使用 boto。我如何也可以为此使用 boto?
基本上,我是 opening/reading 来自“/tmp/path”的文件,对其进行 gzip 压缩,然后保存到 S3 存储桶
编辑:
s3_resource = boto3.resource('s3')
bucket = s3_resource.Bucket('testunzipping')
s3_filename = 'samplefile.csv.'
for i in testList:
#zip_ref.open(i, ‘r’)
with contextlib.ExitStack() as stack:
source_file = stack.enter_context(open(i , mode="rb"))
destination_file = io.BytesIO()
destination_file_gz = stack.enter_context(gzip.GzipFile(fileobj=destination_file, mode='wb'))
while True:
chunk = source_file.read(1024)
if not chunk:
break
destination_file_gz.write(chunk)
destination_file.seek(0)
fileName = i.replace("/tmp/DataPump_10000838/", "")
bucket.upload_fileobj(destination_file, fileName)
测试列表中的每一项看起来像这样 "/tmp/your_file.txt"
AWS Lambda function but it throws an error because It Is unable to install the s3fs module
其他包和您自己的库代码(可重用代码)应放在 lambda 层中。
How I can use boto for this too?
s3 = boto3.resource("s3")
bucket = s3.Bucket(bucket_name)
然后:
如果您的文件在内存中(类文件对象,以字节模式打开,例如 io.BytesIO
或只是 open(..., 'b')
)
bucket.upload_fileobj(fileobj, s3_filename)
或者如果您当前的 space 中有一个文件:
bucket.upload_file(filepath, s3_filename)