[Python]比较两个 zip 文件的函数,一个位于 FTP 目录,另一个在我的本地机器上
[Python]Function that compares two zip files, one located in FTP dir, the other on my local machine
我在创建比较两个 zip 文件的函数时遇到问题(如果它们相同,不仅是按名称)。这是我的代码示例:
def validate_zip_files(self):
host = '192.168.0.1'
port = 2323
username = '123'
password = '123'
ftp = FTP()
ftp.connect(host, port)
ftp.login(username,password)
ftp.cwd('test')
print ftp.pwd()
ftp.retrbinary('RETR test', open('test.zip', 'wb').write)
file1=open('test.zip', 'wb')
file2=open('/home/user/file/text.zip', 'wb')
return filecmp.cmp(file1, file2, shallow=True)
其中一个问题是第二个 zip 位于不同的位置('/home/user/file/text.zip'),我正在我的 python 脚本所在的目录中下载 zip 文件。我不是 100% 确定 filecmp.cmp 适用于 .zip 文件。
任何想法都会很棒 :) 谢谢。
我不会直接比较文件,而是继续比较文件的散列值。这消除了 filecmp
的依赖性,这可能 - 如您所说 - 不适用于压缩文件。
import hashlib
def compare_files(a,b):
fileA = hashlib.sha256(open(a, 'rb').read()).digest()
fileB = hashlib.sha256(open(b, 'rb').read()).digest()
if fileA == fileB:
return True
else:
return False
查看我的 gist 比较两个 zip 文件的内容,并从一个 zip 生成补丁文件到另一个。例如,如果两个 zip 文件共享一个条目但内容不同,我的要点将能够找到它;如果他们有不同的条目,要点也可以做到。要点忽略了修改时间的差异。尽管如此,如果您只关心 浅层 比较,那么 hashlib
可能是更好的选择。
要点中的代码供您参考:
import os
import argparse
import collections
import tempfile
import zipfile
import filecmp
import shutil
import shlex
ZipCmpResult = collections.namedtuple('ZipCmpResult',
['to_rm', 'to_cmp', 'to_add'])
def make_parser():
parser = argparse.ArgumentParser(
description='Make patch zip file from two similar zip files.')
parser.add_argument(
'--oldfile',
default=os.path.join('share', 'old.zip'),
help='default: %(default)s')
parser.add_argument(
'--newfile',
default=os.path.join('share', 'new.zip'),
help='default: %(default)s')
parser.add_argument(
'--toname',
default=os.path.join('share', 'patch'),
help='default: %(default)s')
return parser
def zipcmp(old, new):
with zipfile.ZipFile(old) as zinfile:
old_names = set(zinfile.namelist())
with zipfile.ZipFile(new) as zinfile:
new_names = set(zinfile.namelist())
to_rm = old_names - new_names
to_cmp = old_names & new_names
to_add = new_names - old_names
return ZipCmpResult(to_rm, to_cmp, to_add)
def compare_files(old, new, cmpresult):
with tempfile.TemporaryDirectory() as tmpdir, \
zipfile.ZipFile(old) as zinfile_old, \
zipfile.ZipFile(new) as zinfile_new:
old_dest = os.path.join(tmpdir, 'old')
new_dest = os.path.join(tmpdir, 'new')
os.mkdir(old_dest)
os.mkdir(new_dest)
for filename in cmpresult.to_cmp:
zinfile_old.extract(filename, path=old_dest)
zinfile_new.extract(filename, path=new_dest)
if not filecmp.cmp(
os.path.join(old_dest, filename),
os.path.join(new_dest, filename),
shallow=False):
cmpresult.to_add.add(filename)
def mkpatch(new, cmpresult, to_name):
with zipfile.ZipFile(new) as zinfile, \
zipfile.ZipFile(to_name + '.zip', 'w') as zoutfile:
for filename in cmpresult.to_add:
with zinfile.open(filename) as infile, \
zoutfile.open(filename, 'w') as outfile:
shutil.copyfileobj(infile, outfile)
with open(to_name + '.sh', 'w', encoding='utf-8') as outfile:
outfile.write('#!/bin/sh\n')
for filename in cmpresult.to_rm:
outfile.write('rm {}\n'.format(shlex.quote(filename)))
def main():
args = make_parser().parse_args()
cmpresult = zipcmp(args.oldfile, args.newfile)
compare_files(args.oldfile, args.newfile, cmpresult)
mkpatch(args.newfile, cmpresult, args.toname)
if __name__ == '__main__':
main()
我在创建比较两个 zip 文件的函数时遇到问题(如果它们相同,不仅是按名称)。这是我的代码示例:
def validate_zip_files(self):
host = '192.168.0.1'
port = 2323
username = '123'
password = '123'
ftp = FTP()
ftp.connect(host, port)
ftp.login(username,password)
ftp.cwd('test')
print ftp.pwd()
ftp.retrbinary('RETR test', open('test.zip', 'wb').write)
file1=open('test.zip', 'wb')
file2=open('/home/user/file/text.zip', 'wb')
return filecmp.cmp(file1, file2, shallow=True)
其中一个问题是第二个 zip 位于不同的位置('/home/user/file/text.zip'),我正在我的 python 脚本所在的目录中下载 zip 文件。我不是 100% 确定 filecmp.cmp 适用于 .zip 文件。
任何想法都会很棒 :) 谢谢。
我不会直接比较文件,而是继续比较文件的散列值。这消除了 filecmp
的依赖性,这可能 - 如您所说 - 不适用于压缩文件。
import hashlib
def compare_files(a,b):
fileA = hashlib.sha256(open(a, 'rb').read()).digest()
fileB = hashlib.sha256(open(b, 'rb').read()).digest()
if fileA == fileB:
return True
else:
return False
查看我的 gist 比较两个 zip 文件的内容,并从一个 zip 生成补丁文件到另一个。例如,如果两个 zip 文件共享一个条目但内容不同,我的要点将能够找到它;如果他们有不同的条目,要点也可以做到。要点忽略了修改时间的差异。尽管如此,如果您只关心 浅层 比较,那么 hashlib
可能是更好的选择。
要点中的代码供您参考:
import os
import argparse
import collections
import tempfile
import zipfile
import filecmp
import shutil
import shlex
ZipCmpResult = collections.namedtuple('ZipCmpResult',
['to_rm', 'to_cmp', 'to_add'])
def make_parser():
parser = argparse.ArgumentParser(
description='Make patch zip file from two similar zip files.')
parser.add_argument(
'--oldfile',
default=os.path.join('share', 'old.zip'),
help='default: %(default)s')
parser.add_argument(
'--newfile',
default=os.path.join('share', 'new.zip'),
help='default: %(default)s')
parser.add_argument(
'--toname',
default=os.path.join('share', 'patch'),
help='default: %(default)s')
return parser
def zipcmp(old, new):
with zipfile.ZipFile(old) as zinfile:
old_names = set(zinfile.namelist())
with zipfile.ZipFile(new) as zinfile:
new_names = set(zinfile.namelist())
to_rm = old_names - new_names
to_cmp = old_names & new_names
to_add = new_names - old_names
return ZipCmpResult(to_rm, to_cmp, to_add)
def compare_files(old, new, cmpresult):
with tempfile.TemporaryDirectory() as tmpdir, \
zipfile.ZipFile(old) as zinfile_old, \
zipfile.ZipFile(new) as zinfile_new:
old_dest = os.path.join(tmpdir, 'old')
new_dest = os.path.join(tmpdir, 'new')
os.mkdir(old_dest)
os.mkdir(new_dest)
for filename in cmpresult.to_cmp:
zinfile_old.extract(filename, path=old_dest)
zinfile_new.extract(filename, path=new_dest)
if not filecmp.cmp(
os.path.join(old_dest, filename),
os.path.join(new_dest, filename),
shallow=False):
cmpresult.to_add.add(filename)
def mkpatch(new, cmpresult, to_name):
with zipfile.ZipFile(new) as zinfile, \
zipfile.ZipFile(to_name + '.zip', 'w') as zoutfile:
for filename in cmpresult.to_add:
with zinfile.open(filename) as infile, \
zoutfile.open(filename, 'w') as outfile:
shutil.copyfileobj(infile, outfile)
with open(to_name + '.sh', 'w', encoding='utf-8') as outfile:
outfile.write('#!/bin/sh\n')
for filename in cmpresult.to_rm:
outfile.write('rm {}\n'.format(shlex.quote(filename)))
def main():
args = make_parser().parse_args()
cmpresult = zipcmp(args.oldfile, args.newfile)
compare_files(args.oldfile, args.newfile, cmpresult)
mkpatch(args.newfile, cmpresult, args.toname)
if __name__ == '__main__':
main()