如何解压缩文件,但跳过并记录错误
How do I unzip files en masse but skip and log errors
我有一个包含大约 3000 个 zip 文件的文件夹
我需要将它们提取到以文件名命名的子文件夹中(此处提取行为)。
我正在使用 7-Zip。
我的问题是其中一些文件受密码保护,而另一些文件已损坏。
我需要能够整体提取所有批次,并记录失败的批次。
7-Zips 的行为是在出错时停止,我找不到解决这个问题的方法。
我想需要某种批处理文件。
我确实看到了这个:
for i in *; do mkdir "$i.extracted"; (cd "$i.extracted" && 7z x "../$i") || echo "Error with $i"; done
但这是在 linux 特定的 post 中,我需要 运行 在 windows 环境中。以上只是告诉我,当从命令提示符 运行 时,我出乎意料。
感谢任何帮助。
伊恩
我在 Python 中写下了我的解决方案,因为我发现它更容易编写和理解。您需要 Python 3 才能 运行 此脚本。
import os
import shutil
import sys
import datetime
import glob
import subprocess
PATH_7ZIP = r'C:\Program Files-Zipz.exe' # Change it according to your 7-Zip installation
PATH_ZIPS = r'zips' # This is where you should put your zips
PATH_OUTF = r'outputs' # This is where the output folders will be put
FILE_LOGS = r'status.log' # This is the name of the log file
def log(msg):
with open(FILE_LOGS, 'a') as f:
now = datetime.datetime.now()
f.write("{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d} {}\n".format(
now.year,
now.month,
now.day,
now.hour,
now.minute,
now.second,
now.microsecond,
msg
))
def fatal_error(msg, ret):
print("Fatal Error:", msg, file=sys.stderr)
log("Fatal Error: " + msg)
exit(ret)
def warning(msg):
log("Warning: " + msg)
def info(msg):
log("Info: " + msg)
# The core logic
def extract_zip(z):
# This executes 7-Zip:
# "e" : extract
# z : the zip file you want to unzip
# "-y": say yes to all the questions that 7-Zip may ask (like if you want to override the file)
# "-p": set the password to none (this prevents 7-Zip to ask it)
# "-o": sets the output path (which is PATH_OUTF\ZIP_NAME)
proc = subprocess.run([PATH_7ZIP, "e", z, "-y", "-p", "-o" + os.path.join(
PATH_OUTF,
os.path.basename(z))
], capture_output=True)
# if 7-Zip returns an error lets log it
if proc.returncode != 0:
warning(z + ". " + proc.stderr.decode("ascii").replace('\r', '').
replace('\n', ''))
# else log that we have successfully extracted the zip
else:
info(z)
def main():
info("Starting main")
# Search for all the zips
zips = glob.glob(os.path.join(PATH_ZIPS, "*.zip"))
# Add also all the 7z (optional)
zips.extend(glob.glob(os.path.join(PATH_ZIPS, "*.7z")))
# (here you can add other file extensions)
info("Found " + str(len(zips)) + " zips!")
for z in zips:
extract_zip(z)
info("End")
# ENTRY POINT: here the program begins
if __name__ == "__main__":
info("Starting new session")
# Lets check the globals
if not os.path.exists(PATH_7ZIP):
fatal_error("7z.exe not found!", 2)
if not os.path.exists(PATH_ZIPS):
fatal_error("Cannot find zips folder!", 3)
if os.path.exists(PATH_OUTF):
# In order to make this script removing the previous outputs, it asks you to pass in the commandline "replace". By doing so we prevent the user to delete the previous data by mistake
if len(sys.argv) == 2 and sys.argv[1] == 'replace':
info("Deleting previous output folder")
shutil.rmtree(PATH_OUTF)
else:
fatal_error("Output dir already exists! Please remove it or call " +
"this script using {} replace".format(sys.argv[0]), 4)
os.makedirs(PATH_OUTF)
main()
注意:当您再次 运行 脚本时,日志文件不会被覆盖。该脚本只是创建(如有必要)并附加到该文件。
尽管如此,我无法访问我正在使用的服务器上的 Python。
我的解决方案是一个批处理文件:-
for %%i in (*); do c:\progra~1-zipz.exe x -y "D:\Export\Docs\Zip\Zipout\%%i" -oD:\Export\Docs\Zip\Zipout\%%i
不过我不知道如何记录发生的事情。
我有一个包含大约 3000 个 zip 文件的文件夹 我需要将它们提取到以文件名命名的子文件夹中(此处提取行为)。 我正在使用 7-Zip。
我的问题是其中一些文件受密码保护,而另一些文件已损坏。 我需要能够整体提取所有批次,并记录失败的批次。
7-Zips 的行为是在出错时停止,我找不到解决这个问题的方法。
我想需要某种批处理文件。
我确实看到了这个:
for i in *; do mkdir "$i.extracted"; (cd "$i.extracted" && 7z x "../$i") || echo "Error with $i"; done
但这是在 linux 特定的 post 中,我需要 运行 在 windows 环境中。以上只是告诉我,当从命令提示符 运行 时,我出乎意料。
感谢任何帮助。
伊恩
我在 Python 中写下了我的解决方案,因为我发现它更容易编写和理解。您需要 Python 3 才能 运行 此脚本。
import os
import shutil
import sys
import datetime
import glob
import subprocess
PATH_7ZIP = r'C:\Program Files-Zipz.exe' # Change it according to your 7-Zip installation
PATH_ZIPS = r'zips' # This is where you should put your zips
PATH_OUTF = r'outputs' # This is where the output folders will be put
FILE_LOGS = r'status.log' # This is the name of the log file
def log(msg):
with open(FILE_LOGS, 'a') as f:
now = datetime.datetime.now()
f.write("{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d} {}\n".format(
now.year,
now.month,
now.day,
now.hour,
now.minute,
now.second,
now.microsecond,
msg
))
def fatal_error(msg, ret):
print("Fatal Error:", msg, file=sys.stderr)
log("Fatal Error: " + msg)
exit(ret)
def warning(msg):
log("Warning: " + msg)
def info(msg):
log("Info: " + msg)
# The core logic
def extract_zip(z):
# This executes 7-Zip:
# "e" : extract
# z : the zip file you want to unzip
# "-y": say yes to all the questions that 7-Zip may ask (like if you want to override the file)
# "-p": set the password to none (this prevents 7-Zip to ask it)
# "-o": sets the output path (which is PATH_OUTF\ZIP_NAME)
proc = subprocess.run([PATH_7ZIP, "e", z, "-y", "-p", "-o" + os.path.join(
PATH_OUTF,
os.path.basename(z))
], capture_output=True)
# if 7-Zip returns an error lets log it
if proc.returncode != 0:
warning(z + ". " + proc.stderr.decode("ascii").replace('\r', '').
replace('\n', ''))
# else log that we have successfully extracted the zip
else:
info(z)
def main():
info("Starting main")
# Search for all the zips
zips = glob.glob(os.path.join(PATH_ZIPS, "*.zip"))
# Add also all the 7z (optional)
zips.extend(glob.glob(os.path.join(PATH_ZIPS, "*.7z")))
# (here you can add other file extensions)
info("Found " + str(len(zips)) + " zips!")
for z in zips:
extract_zip(z)
info("End")
# ENTRY POINT: here the program begins
if __name__ == "__main__":
info("Starting new session")
# Lets check the globals
if not os.path.exists(PATH_7ZIP):
fatal_error("7z.exe not found!", 2)
if not os.path.exists(PATH_ZIPS):
fatal_error("Cannot find zips folder!", 3)
if os.path.exists(PATH_OUTF):
# In order to make this script removing the previous outputs, it asks you to pass in the commandline "replace". By doing so we prevent the user to delete the previous data by mistake
if len(sys.argv) == 2 and sys.argv[1] == 'replace':
info("Deleting previous output folder")
shutil.rmtree(PATH_OUTF)
else:
fatal_error("Output dir already exists! Please remove it or call " +
"this script using {} replace".format(sys.argv[0]), 4)
os.makedirs(PATH_OUTF)
main()
注意:当您再次 运行 脚本时,日志文件不会被覆盖。该脚本只是创建(如有必要)并附加到该文件。
尽管如此,我无法访问我正在使用的服务器上的 Python。
我的解决方案是一个批处理文件:-
for %%i in (*); do c:\progra~1-zipz.exe x -y "D:\Export\Docs\Zip\Zipout\%%i" -oD:\Export\Docs\Zip\Zipout\%%i
不过我不知道如何记录发生的事情。