无法使用 Python 解压缩 GCS 存储桶中受密码保护的 .gz 文件

unable unzip password protected .gz file in GCS bucket using Python

尝试在 GCS 中解压缩受密码保护的文件,但在以下代码中出现错误。下面的代码可以很好地处理普通的 .gz 文件,但无法解压缩受密码保护的文件。

storage_client = storage.Client()
source_bucket = 'bucket'
source_bucket1 = storage_client.bucket(source_bucket)
blob = source_bucket1.blob("path/filename.gz")
zipbytes = io.BytesIO(blob.download_as_string())
        print(zipbytes)
        if is_zipfile(zipbytes):
            with ZipFile(zipbytes, 'r') as myzip:
                for contentfilename in myzip.namelist():
                    contentfile = myzip.read(contentfilename)
                    contentfilename = contentfilename[:-3]
                    blob1 = bucket.blob(contentfilename)
                    blob1.upload_from_string(contentfile)
                    print(f'File decompressed from {zipfilename_with_path} to {contentfilename}')
        blob.delete()

您可以使用 Python,例如来自云函数:

from google.cloud import storage
    from zipfile import ZipFile
    from zipfile import is_zipfile
    import io

    def zipextract(bucketname, zipfilename_with_path):

        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucketname)

        destination_blob_pathname = zipfilename_with_path
        
        blob = bucket.blob(destination_blob_pathname)
        zipbytes = io.BytesIO(blob.download_as_string())

        if is_zipfile(zipbytes):
            with ZipFile(zipbytes, 'r') as myzip:
                for contentfilename in myzip.namelist():
                    contentfile = myzip.read(contentfilename)
                    blob = bucket.blob(zipfilename_with_path + "/" + contentfilename)
                    blob.upload_from_string(contentfile)

    zipextract("mybucket", "path/file.zip") # if the file is gs://mybucket/path/file.zip

我能够使用以下逻辑读取 .csv.gz 受密码保护的文件。所有这些都是在内存中完成的。如果文件很大但工作正常,则会出现性能问题。

        storage_client = storage.Client()
        source_bucket = '<bucket-name>'
        source_bucket1 = storage_client.bucket(source_bucket)
        bukcet_folder = '/unzip'
        blob = source_bucket1.blob(path)
        zipbytes = io.BytesIO(blob.download_as_string())
        with ZipFile(zipbytes, 'r') as myzip:
            print("Inside the zipfiles loop")
            with myzip.open('filename.csv',pwd=b'password') as myfile:
                print("Inside zip 2 loop")
                contentfile = myfile.read()
                contentfilename = bucket_folder + destination_file_path
                blob1 = source_bucket1.blob(contentfilename)
                blob1.upload_from_string(contentfile)
              ```