python 中比较文件、检查文件名的路径、文件大小和校验和 md5
Comparing files, path to check filenames, filesize, and checksum md5 in python
我想比较具有路径、文件名、文件大小和 md5 校验和的目录中的文件。当我独立检查文件名、文件大小和 md5 校验和时,我得到了三个可以正常工作的函数。我认为问题在于我如何设置另一个函数来处理包含 csv 文件项的字典。这是要比较的 csv 文件。
|Path|Filename|File Size|Hash
|/var/tmp/test|test1.txt|257|2e6041635f72233f4cdf6fbfb0a8288e
|/var/tmp/test|text2.txt|68|d3428d5910f54270d62ff57ccd5ff52c
|/var/tmp/test|text3.txt|58|42e8b3cba5320e07745110b8b193f534
|/var/tmp/test|text4.xml|128|4acc96e6e8b9006722408e15e555d2c2
|/var/tmp/test|text5.csv|214|a7071c13195d8485b2fb4a68503cbd7a
我已经尝试修改 md5、文件名、文件大小及其在目录中的循环方式,但似乎有问题。
def csv_checksum(files, path):
# Get column with delimiter
csv.register_dialect('myDialect', delimiter = '|')
csvDics = {}
# Open file, read them, and output csv formatted
with open(files, 'r') as f:
reader = csv.reader(f, dialect='myDialect')
for row in reader:
if reader.line_num == 1:
continue
csvDic = {
'Directory': row[1],
'Filename': row[2],
'File Size': row[3],
'Hash': row[4]
}
csvDics.update(csvDic)
print(csvDics)
comp_original(csvDics, path)
def comp_original(dic, path):
for (dirpath, dirnames, filenames) in os.walk(path):
for files in filenames:
if (dic.get('Directory') == path
and dic.get('Filename') == get_filename(files)
and dic.get('File Size') == get_filesize(files)
and dic.get('Hash') == get_md5(files)):
print("All files matches")
return True
def get_filename(fname):
filename = os.path.basename(fname)
return filename
def get_filesize(fname):
stat_info = os.stat(fname)
file_size = stat_info.st_size
return file_size
def get_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
get_hash = hash_md5.hexdigest()
return get_hash
对于文件名,它通过循环但打印出 3 个不匹配为 No matches
,一个为 All files matches
,所有这些都应该匹配。然后对于文件大小和 get_md5,我得到 OSError: [Errno 2] No such file or directory: 'text3.txt'
文件名问题:
{'Directory': '/var/tmp/test', 'File Size': '257', 'Hash': '2e6041635f72233f4cdf6fbfb0a8288e', 'Filename': 'test1.txt'}
{'Directory': '/var/tmp/test', 'File Size': '68', 'Hash': 'd3428d5910f54270d62ff57ccd5ff52c', 'Filename': 'text2.txt'}
{'Directory': '/var/tmp/test', 'File Size': '58', 'Hash': '42e8b3cba5320e07745110b8b193f534', 'Filename': 'text3.txt'}
{'Directory': '/var/tmp/test', 'File Size': '128', 'Hash': '4acc96e6e8b9006722408e15e555d2c2', 'Filename': 'text4.xml'}
{'Directory': '/var/tmp/test', 'File Size': '214', 'Hash': 'a7071c13195d8485b2fb4a68503cbd7a', 'Filename': 'text5.csv'}
No matches
No matches
No matches
All files matches
文件大小:
File "./create_manifest.py", line 44, in csv_checksum
comp_baseline_manifest(csvDics, path)
File "./create_manifest.py", line 88, in comp_baseline_manifest
and dic.get('File Size') == get_filesize(files)):
File "./create_manifest.py", line 100, in get_filesize
stat_info = os.stat(fname)
OSError: [Errno 2] No such file or directory: 'text3.txt'
对于 md5 错误:
comp_baseline_manifest(csvDics, path)
File "./create_manifest.py", line 89, in comp_baseline_manifest
and dic.get('Hash') == get_md5(files)):
File "./create_manifest.py", line 107, in get_md5
with open(fname, "rb") as f:
IOError: [Errno 2] No such file or directory: 'text3.txt'
而不是这个:
for (dirpath, dirnames, filenames) in os.walk(path):
for files in filenames:
if (dic.get('Directory') == path
and dic.get('Filename') == get_filename(files)
and dic.get('File Size') == get_filesize(files)
and dic.get('Hash') == get_md5(files)):
你应该用过:
for root, dirs, files in os.walk(path):
for f in files:
file_name = os.path.join( root, f ) # <<--- this is important
if (dic.get('Directory') == path # `root` here, not `path` ??
and dic.get('Filename') == get_filename(file_name)
and dic.get('File Size') == get_filesize(file_name)
and dic.get('Hash') == get_md5(file_name)):
我想比较具有路径、文件名、文件大小和 md5 校验和的目录中的文件。当我独立检查文件名、文件大小和 md5 校验和时,我得到了三个可以正常工作的函数。我认为问题在于我如何设置另一个函数来处理包含 csv 文件项的字典。这是要比较的 csv 文件。
|Path|Filename|File Size|Hash
|/var/tmp/test|test1.txt|257|2e6041635f72233f4cdf6fbfb0a8288e
|/var/tmp/test|text2.txt|68|d3428d5910f54270d62ff57ccd5ff52c
|/var/tmp/test|text3.txt|58|42e8b3cba5320e07745110b8b193f534
|/var/tmp/test|text4.xml|128|4acc96e6e8b9006722408e15e555d2c2
|/var/tmp/test|text5.csv|214|a7071c13195d8485b2fb4a68503cbd7a
我已经尝试修改 md5、文件名、文件大小及其在目录中的循环方式,但似乎有问题。
def csv_checksum(files, path):
# Get column with delimiter
csv.register_dialect('myDialect', delimiter = '|')
csvDics = {}
# Open file, read them, and output csv formatted
with open(files, 'r') as f:
reader = csv.reader(f, dialect='myDialect')
for row in reader:
if reader.line_num == 1:
continue
csvDic = {
'Directory': row[1],
'Filename': row[2],
'File Size': row[3],
'Hash': row[4]
}
csvDics.update(csvDic)
print(csvDics)
comp_original(csvDics, path)
def comp_original(dic, path):
for (dirpath, dirnames, filenames) in os.walk(path):
for files in filenames:
if (dic.get('Directory') == path
and dic.get('Filename') == get_filename(files)
and dic.get('File Size') == get_filesize(files)
and dic.get('Hash') == get_md5(files)):
print("All files matches")
return True
def get_filename(fname):
filename = os.path.basename(fname)
return filename
def get_filesize(fname):
stat_info = os.stat(fname)
file_size = stat_info.st_size
return file_size
def get_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
get_hash = hash_md5.hexdigest()
return get_hash
对于文件名,它通过循环但打印出 3 个不匹配为 No matches
,一个为 All files matches
,所有这些都应该匹配。然后对于文件大小和 get_md5,我得到 OSError: [Errno 2] No such file or directory: 'text3.txt'
文件名问题:
{'Directory': '/var/tmp/test', 'File Size': '257', 'Hash': '2e6041635f72233f4cdf6fbfb0a8288e', 'Filename': 'test1.txt'}
{'Directory': '/var/tmp/test', 'File Size': '68', 'Hash': 'd3428d5910f54270d62ff57ccd5ff52c', 'Filename': 'text2.txt'}
{'Directory': '/var/tmp/test', 'File Size': '58', 'Hash': '42e8b3cba5320e07745110b8b193f534', 'Filename': 'text3.txt'}
{'Directory': '/var/tmp/test', 'File Size': '128', 'Hash': '4acc96e6e8b9006722408e15e555d2c2', 'Filename': 'text4.xml'}
{'Directory': '/var/tmp/test', 'File Size': '214', 'Hash': 'a7071c13195d8485b2fb4a68503cbd7a', 'Filename': 'text5.csv'}
No matches
No matches
No matches
All files matches
文件大小:
File "./create_manifest.py", line 44, in csv_checksum
comp_baseline_manifest(csvDics, path)
File "./create_manifest.py", line 88, in comp_baseline_manifest
and dic.get('File Size') == get_filesize(files)):
File "./create_manifest.py", line 100, in get_filesize
stat_info = os.stat(fname)
OSError: [Errno 2] No such file or directory: 'text3.txt'
对于 md5 错误:
comp_baseline_manifest(csvDics, path)
File "./create_manifest.py", line 89, in comp_baseline_manifest
and dic.get('Hash') == get_md5(files)):
File "./create_manifest.py", line 107, in get_md5
with open(fname, "rb") as f:
IOError: [Errno 2] No such file or directory: 'text3.txt'
而不是这个:
for (dirpath, dirnames, filenames) in os.walk(path):
for files in filenames:
if (dic.get('Directory') == path
and dic.get('Filename') == get_filename(files)
and dic.get('File Size') == get_filesize(files)
and dic.get('Hash') == get_md5(files)):
你应该用过:
for root, dirs, files in os.walk(path):
for f in files:
file_name = os.path.join( root, f ) # <<--- this is important
if (dic.get('Directory') == path # `root` here, not `path` ??
and dic.get('Filename') == get_filename(file_name)
and dic.get('File Size') == get_filesize(file_name)
and dic.get('Hash') == get_md5(file_name)):