TypeError: list indices must be integers or slices, not str when using nested dictionaries
TypeError: list indices must be integers or slices, not str when using nested dictionaries
我正在使用嵌套字典创建本地存储的文本文件的倒排索引。倒排索引的抽象结构如下(取值为整数)。在任何单词中,键“0”的值是 idf,键“1”的值是 tf。
inverted_index={'word1':{'0':idf_value, '1': 2 , 'filename1': frequency_value, 'filename2': frequency_value},'word2':{'0':idf_value, '1': 2, 'filename1': frequency_value, 'filename2': frequency_value}}
这是代码:
import textract, math, os
docs=[]
#Read the files and store them in docs
folder = os.listdir("./input/")
for file in folder:
if file.endswith("txt"):
docs.append ([file,textract.process("./input/"+file)])
inverted_index={}
for doc in docs:
words=doc[1].decode()
words=words.split(" ")
#loop through and build the inverted index
for word in words:
temp={}
#to remove initial white space
if (word == " ") or (word==""):
continue
if word not in inverted_index:
temp[doc[0]]=1
temp['0']=0 #idf
temp['1']=1 #tf
inverted_index[word]=temp
else:
if doc[0] not in inverted_index[word].keys():
inverted_index[word][doc[0]]=1
inverted_index[word]['1']=inverted_index[word]['1']+1
else:
inverted_index[word][doc[0]]=inverted_index[word][doc[0]]+1
# to sort and print values with calculating the the tf and idf on the fly
for key, value in sorted(inverted_index.items()): # to sort words alphabitically
inverted_index[key]=sorted(inverted_index[key]) # to sort the filenames where the word occured.
inverted_index[key]['0']=math.log2(len(docs)/value['1']) # the error in this line
print(key, value)
但我在倒数第二行收到此错误:
Traceback (most recent call last):
File "aaaa.py", line 34, in <module>
inverted_index[key]['0']=math.log2(len(docs)/value['1'])
TypeError: list indices must be integers or slices, not str
你能帮我修复这个错误吗?谢谢
错误来自 inverted_index[key]['0']
,因为 inverted_index[key] = sorted(inverted_index[key])
创建了你的内部字典键列表,而你的
print(inverted_index[key])
# becomes ['0', '1', 'filename1', 'filename2']
因此会触发 TypeError,因为您无法对列表进行字符串索引。
为了让你改变你的内部字典的每个字['0']值你可以试试这个代码:
for key, value in sorted(inverted_index.items()):
inverted_index[key] = sorted(inverted_index[key])
current_word_key = inverted_index[key][0]
value['0'] = 'some_value'
inverted_index[key] = value
print(inverted_index)
这对我有用
for key, value in sorted(inverted_index.items()):
inverted_index[key]=sorted(inverted_index[key])
value['0']=math.log2(len(docs)/value['1']) # the error in this line
inverted_index[key]=value
print(key, value)
我正在使用嵌套字典创建本地存储的文本文件的倒排索引。倒排索引的抽象结构如下(取值为整数)。在任何单词中,键“0”的值是 idf,键“1”的值是 tf。
inverted_index={'word1':{'0':idf_value, '1': 2 , 'filename1': frequency_value, 'filename2': frequency_value},'word2':{'0':idf_value, '1': 2, 'filename1': frequency_value, 'filename2': frequency_value}}
这是代码:
import textract, math, os
docs=[]
#Read the files and store them in docs
folder = os.listdir("./input/")
for file in folder:
if file.endswith("txt"):
docs.append ([file,textract.process("./input/"+file)])
inverted_index={}
for doc in docs:
words=doc[1].decode()
words=words.split(" ")
#loop through and build the inverted index
for word in words:
temp={}
#to remove initial white space
if (word == " ") or (word==""):
continue
if word not in inverted_index:
temp[doc[0]]=1
temp['0']=0 #idf
temp['1']=1 #tf
inverted_index[word]=temp
else:
if doc[0] not in inverted_index[word].keys():
inverted_index[word][doc[0]]=1
inverted_index[word]['1']=inverted_index[word]['1']+1
else:
inverted_index[word][doc[0]]=inverted_index[word][doc[0]]+1
# to sort and print values with calculating the the tf and idf on the fly
for key, value in sorted(inverted_index.items()): # to sort words alphabitically
inverted_index[key]=sorted(inverted_index[key]) # to sort the filenames where the word occured.
inverted_index[key]['0']=math.log2(len(docs)/value['1']) # the error in this line
print(key, value)
但我在倒数第二行收到此错误:
Traceback (most recent call last):
File "aaaa.py", line 34, in <module>
inverted_index[key]['0']=math.log2(len(docs)/value['1'])
TypeError: list indices must be integers or slices, not str
你能帮我修复这个错误吗?谢谢
错误来自 inverted_index[key]['0']
,因为 inverted_index[key] = sorted(inverted_index[key])
创建了你的内部字典键列表,而你的
print(inverted_index[key])
# becomes ['0', '1', 'filename1', 'filename2']
因此会触发 TypeError,因为您无法对列表进行字符串索引。
为了让你改变你的内部字典的每个字['0']值你可以试试这个代码:
for key, value in sorted(inverted_index.items()):
inverted_index[key] = sorted(inverted_index[key])
current_word_key = inverted_index[key][0]
value['0'] = 'some_value'
inverted_index[key] = value
print(inverted_index)
这对我有用
for key, value in sorted(inverted_index.items()):
inverted_index[key]=sorted(inverted_index[key])
value['0']=math.log2(len(docs)/value['1']) # the error in this line
inverted_index[key]=value
print(key, value)