如何递归提取zip文件?
How to extract zip file recursively?
我有一个 zip 文件,其中包含三个 zip 文件,如下所示:
zipfile.zip\
dirA.zip\
a
dirB.zip\
b
dirC.zip\
c
我想将 zip 文件中的所有内部 zip 文件提取到具有这些名称(dirA、dirB、dirC)的目录中。
基本上,我想以以下架构结束:
output\
dirA\
a
dirB\
b
dirC\
c
我尝试了以下方法:
import os, re
from zipfile import ZipFile
os.makedirs(directory) # where directory is "\output"
with ZipFile(self.archive_name, "r") as archive:
for id, files in data.items():
if files:
print("Creating", id)
dirpath = os.path.join(directory, id)
os.mkdir(dirpath)
for file in files:
match = pattern.match(filename)
new = match.group(2)
new_filename = os.path.join(dirpath, new)
content = archive.open(file).read()
with open(new_filename, "wb") as outfile:
outfile.write(content)
但它只提取 zip 文件,我最终得到:
output\
dirA\
dirA.zip
dirB\
dirB.zip
dirC\
dirC.zip
任何包括代码段的建议都将不胜感激,因为我尝试了很多不同的东西但没有成功阅读文档。
解压缩 zip 文件时,您可能希望将内部 zip 文件写入内存,而不是将它们写入磁盘。为此,我使用了 BytesIO
.
查看此代码:
import os
import io
import zipfile
def extract(filename):
z = zipfile.ZipFile(filename)
for f in z.namelist():
# get directory name from file
dirname = os.path.splitext(f)[0]
# create new directory
os.mkdir(dirname)
# read inner zip file into bytes buffer
content = io.BytesIO(z.read(f))
zip_file = zipfile.ZipFile(content)
for i in zip_file.namelist():
zip_file.extract(i, dirname)
如果您 运行 extract("zipfile.zip")
和 zipfile.zip
为:
zipfile.zip/
dirA.zip/
a
dirB.zip/
b
dirC.zip/
c
输出应该是:
dirA/
a
dirB/
b
dirC/
c
对于提取嵌套 zip 文件(任何嵌套级别)并清理原始 zip 文件的函数:
import zipfile, re, os
def extract_nested_zip(zippedFile, toFolder):
""" Extract a zip file including any nested zip files
Delete the zip file(s) after extraction
"""
with zipfile.ZipFile(zippedFile, 'r') as zfile:
zfile.extractall(path=toFolder)
os.remove(zippedFile)
for root, dirs, files in os.walk(toFolder):
for filename in files:
if re.search(r'\.zip$', filename):
fileSpec = os.path.join(root, filename)
extract_nested_zip(fileSpec, root)
我尝试了其他一些解决方案,但无法使它们起作用 "in place"。我将 post 我的解决方案来处理 "in place" 版本。注意:它会删除 zip 文件和 'replaces' 它们具有相同名称的目录,所以如果你想保留你的 zip 文件,请备份。
策略很简单。解压缩目录(和子目录)中的所有 zip 文件并冲洗并重复,直到没有 zip 文件保留。如果 zip 文件包含 zip 文件,则需要冲洗和重复。
import os
import io
import zipfile
import re
def unzip_directory(directory):
"""" This function unzips (and then deletes) all zip files in a directory """
for root, dirs, files in os.walk(directory):
for filename in files:
if re.search(r'\.zip$', filename):
to_path = os.path.join(root, filename.split('.zip')[0])
zipped_file = os.path.join(root, filename)
if not os.path.exists(to_path):
os.makedirs(to_path)
with zipfile.ZipFile(zipped_file, 'r') as zfile:
zfile.extractall(path=to_path)
# deletes zip file
os.remove(zipped_file)
def exists_zip(directory):
""" This function returns T/F whether any .zip file exists within the directory, recursively """
is_zip = False
for root, dirs, files in os.walk(directory):
for filename in files:
if re.search(r'\.zip$', filename):
is_zip = True
return is_zip
def unzip_directory_recursively(directory, max_iter=1000):
print("Does the directory path exist? ", os.path.exists(directory))
""" Calls unzip_directory until all contained zip files (and new ones from previous calls)
are unzipped
"""
iterate = 0
while exists_zip(directory) and iterate < max_iter:
unzip_directory(directory)
iterate += 1
pre = "Did not " if iterate < max_iter else "Did"
print(pre, "time out based on max_iter limit of", max_iter, ". Took iterations:", iterate)
假设您的 zip 文件已备份,您可以通过调用 unzip_directory_recursively(your_directory)
.
使这一切正常进行
这对我有用。只需将此脚本与嵌套的 zip 放在同一目录下即可。它会将 zip 解压缩到与原始 zip 同名的目录中,并清理原始 zip。它还将计算嵌套 zip 中的文件总数
import os
from zipfile import ZipFile
def unzip (path, total_count):
for root, dirs, files in os.walk(path):
for file in files:
file_name = os.path.join(root, file)
if (not file_name.endswith('.zip')):
total_count += 1
else:
currentdir = file_name[:-4]
if not os.path.exists(currentdir):
os.makedirs(currentdir)
with ZipFile(file_name) as zipObj:
zipObj.extractall(currentdir)
os.remove(file_name)
total_count = unzip(currentdir, total_count)
return total_count
total_count = unzip ('.', 0)
print(total_count)
我有一个 zip 文件,其中包含三个 zip 文件,如下所示:
zipfile.zip\
dirA.zip\
a
dirB.zip\
b
dirC.zip\
c
我想将 zip 文件中的所有内部 zip 文件提取到具有这些名称(dirA、dirB、dirC)的目录中。
基本上,我想以以下架构结束:
output\
dirA\
a
dirB\
b
dirC\
c
我尝试了以下方法:
import os, re
from zipfile import ZipFile
os.makedirs(directory) # where directory is "\output"
with ZipFile(self.archive_name, "r") as archive:
for id, files in data.items():
if files:
print("Creating", id)
dirpath = os.path.join(directory, id)
os.mkdir(dirpath)
for file in files:
match = pattern.match(filename)
new = match.group(2)
new_filename = os.path.join(dirpath, new)
content = archive.open(file).read()
with open(new_filename, "wb") as outfile:
outfile.write(content)
但它只提取 zip 文件,我最终得到:
output\
dirA\
dirA.zip
dirB\
dirB.zip
dirC\
dirC.zip
任何包括代码段的建议都将不胜感激,因为我尝试了很多不同的东西但没有成功阅读文档。
解压缩 zip 文件时,您可能希望将内部 zip 文件写入内存,而不是将它们写入磁盘。为此,我使用了 BytesIO
.
查看此代码:
import os
import io
import zipfile
def extract(filename):
z = zipfile.ZipFile(filename)
for f in z.namelist():
# get directory name from file
dirname = os.path.splitext(f)[0]
# create new directory
os.mkdir(dirname)
# read inner zip file into bytes buffer
content = io.BytesIO(z.read(f))
zip_file = zipfile.ZipFile(content)
for i in zip_file.namelist():
zip_file.extract(i, dirname)
如果您 运行 extract("zipfile.zip")
和 zipfile.zip
为:
zipfile.zip/
dirA.zip/
a
dirB.zip/
b
dirC.zip/
c
输出应该是:
dirA/
a
dirB/
b
dirC/
c
对于提取嵌套 zip 文件(任何嵌套级别)并清理原始 zip 文件的函数:
import zipfile, re, os
def extract_nested_zip(zippedFile, toFolder):
""" Extract a zip file including any nested zip files
Delete the zip file(s) after extraction
"""
with zipfile.ZipFile(zippedFile, 'r') as zfile:
zfile.extractall(path=toFolder)
os.remove(zippedFile)
for root, dirs, files in os.walk(toFolder):
for filename in files:
if re.search(r'\.zip$', filename):
fileSpec = os.path.join(root, filename)
extract_nested_zip(fileSpec, root)
我尝试了其他一些解决方案,但无法使它们起作用 "in place"。我将 post 我的解决方案来处理 "in place" 版本。注意:它会删除 zip 文件和 'replaces' 它们具有相同名称的目录,所以如果你想保留你的 zip 文件,请备份。
策略很简单。解压缩目录(和子目录)中的所有 zip 文件并冲洗并重复,直到没有 zip 文件保留。如果 zip 文件包含 zip 文件,则需要冲洗和重复。
import os
import io
import zipfile
import re
def unzip_directory(directory):
"""" This function unzips (and then deletes) all zip files in a directory """
for root, dirs, files in os.walk(directory):
for filename in files:
if re.search(r'\.zip$', filename):
to_path = os.path.join(root, filename.split('.zip')[0])
zipped_file = os.path.join(root, filename)
if not os.path.exists(to_path):
os.makedirs(to_path)
with zipfile.ZipFile(zipped_file, 'r') as zfile:
zfile.extractall(path=to_path)
# deletes zip file
os.remove(zipped_file)
def exists_zip(directory):
""" This function returns T/F whether any .zip file exists within the directory, recursively """
is_zip = False
for root, dirs, files in os.walk(directory):
for filename in files:
if re.search(r'\.zip$', filename):
is_zip = True
return is_zip
def unzip_directory_recursively(directory, max_iter=1000):
print("Does the directory path exist? ", os.path.exists(directory))
""" Calls unzip_directory until all contained zip files (and new ones from previous calls)
are unzipped
"""
iterate = 0
while exists_zip(directory) and iterate < max_iter:
unzip_directory(directory)
iterate += 1
pre = "Did not " if iterate < max_iter else "Did"
print(pre, "time out based on max_iter limit of", max_iter, ". Took iterations:", iterate)
假设您的 zip 文件已备份,您可以通过调用 unzip_directory_recursively(your_directory)
.
这对我有用。只需将此脚本与嵌套的 zip 放在同一目录下即可。它会将 zip 解压缩到与原始 zip 同名的目录中,并清理原始 zip。它还将计算嵌套 zip 中的文件总数
import os
from zipfile import ZipFile
def unzip (path, total_count):
for root, dirs, files in os.walk(path):
for file in files:
file_name = os.path.join(root, file)
if (not file_name.endswith('.zip')):
total_count += 1
else:
currentdir = file_name[:-4]
if not os.path.exists(currentdir):
os.makedirs(currentdir)
with ZipFile(file_name) as zipObj:
zipObj.extractall(currentdir)
os.remove(file_name)
total_count = unzip(currentdir, total_count)
return total_count
total_count = unzip ('.', 0)
print(total_count)