如何为文件创建哈希库并放入字典
How to create hashlib for the files and put into dictionary
我在一个文件夹中有 4 个文件 hash.py size_1.py size_2.py size.py
。 size_1.py , size_2.py 大小相同。
我正在为文件创建 hashlib
import hashlib, os, sys
result = {}
for root, dirs,files in os.walk(".", topdown=True):
for name in files:
#print(os.path.join(root, name))
FileName = (os.path.join(root, name))
hasher = hashlib.md5()
with open(str(FileName), 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = (afile,hasher.hexdigest())
#print (file_hash)
result[file_hash[1]] = file_hash[0]
#if file_hash[1] in result:
# result[file_hash[1]].append(file_hash[0])
#else:
# result[file_hash[1]] = file_hash[0]
print (result)
我的输出
{'e12d780eba6e03a7c1cafa394ef9f31f': <_io.BufferedReader name='./size.py'>, '49eb7137273ec333727ea0f5279fe040': <_io.BufferedReader name='./size_1.py'>, '35e93b380f084d5187976beae746492e': <_io.BufferedReader name='./hash.py'>}
我想要的
{'e12d780eba6e03a7c1cafa394ef9f31f': ['./size.py']>, '49eb7137273ec333727ea0f5279fe040': ['./size_1.py','./size_1.py'], '35e93b380f084d5187976beae746492e': ['./hash.py']}
此处必须进行 2 次提升
删除 <_io.BufferedReader 名称=
用字典的形式写出来
我不确定你为什么要在不需要的地方创建元组。
import hashlib, os, sys
result = {}
for root, dirs, files in os.walk('.', topdown=True):
for name in files:
hasher = hashlib.md5()
fn = os.path.join(root, name)
with open(fn, 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = hasher.hexdigest()
if fn in result:
result[fn].append(file_hash)
else:
result[fn] = [file_hash]
感谢@olvin
import hashlib, os, sys
result = {}
for root, dirs, files in os.walk('.', topdown=True):
for name in files:
hasher = hashlib.md5()
fn = os.path.join(root, name)
with open(fn, 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = hasher.hexdigest()
if file_hash in result:
result[file_hash].append(fn)
else:
result[file_hash] = [fn]
print (result)
我在一个文件夹中有 4 个文件 hash.py size_1.py size_2.py size.py
。 size_1.py , size_2.py 大小相同。
我正在为文件创建 hashlib
import hashlib, os, sys
result = {}
for root, dirs,files in os.walk(".", topdown=True):
for name in files:
#print(os.path.join(root, name))
FileName = (os.path.join(root, name))
hasher = hashlib.md5()
with open(str(FileName), 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = (afile,hasher.hexdigest())
#print (file_hash)
result[file_hash[1]] = file_hash[0]
#if file_hash[1] in result:
# result[file_hash[1]].append(file_hash[0])
#else:
# result[file_hash[1]] = file_hash[0]
print (result)
我的输出
{'e12d780eba6e03a7c1cafa394ef9f31f': <_io.BufferedReader name='./size.py'>, '49eb7137273ec333727ea0f5279fe040': <_io.BufferedReader name='./size_1.py'>, '35e93b380f084d5187976beae746492e': <_io.BufferedReader name='./hash.py'>}
我想要的
{'e12d780eba6e03a7c1cafa394ef9f31f': ['./size.py']>, '49eb7137273ec333727ea0f5279fe040': ['./size_1.py','./size_1.py'], '35e93b380f084d5187976beae746492e': ['./hash.py']}
此处必须进行 2 次提升
删除 <_io.BufferedReader 名称=
用字典的形式写出来
我不确定你为什么要在不需要的地方创建元组。
import hashlib, os, sys
result = {}
for root, dirs, files in os.walk('.', topdown=True):
for name in files:
hasher = hashlib.md5()
fn = os.path.join(root, name)
with open(fn, 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = hasher.hexdigest()
if fn in result:
result[fn].append(file_hash)
else:
result[fn] = [file_hash]
感谢@olvin
import hashlib, os, sys
result = {}
for root, dirs, files in os.walk('.', topdown=True):
for name in files:
hasher = hashlib.md5()
fn = os.path.join(root, name)
with open(fn, 'rb') as afile:
buf = afile.read()
hasher.update(buf)
file_hash = hasher.hexdigest()
if file_hash in result:
result[file_hash].append(fn)
else:
result[file_hash] = [fn]
print (result)