AttributeError: '' object has no attribute '' - in-class declared variable is not recognized
AttributeError: '' object has no attribute '' - in-class declared variable is not recognized
对于给定的标记化文本语料库,我想使用多种加权技术来执行单词加权。为此,我创建了以下 class:
class Weighing:
def __init__(self, input_file, word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i, word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = Counter(document)
for word in set(document):
self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i, count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document, 1)
for word in set(document):
binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
if self.word_weighing_ == 1:
gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
gtw = self.calc_idf()
elif self.word_weighing_ == 3:
gtw = self.calc_normal()
elif self.word_weighing_ == 4:
gtw = self.calc_probidf()
现在,当我 运行:
model = Weighing(input_file = data_list,
word_weighing = 1)
带有 data_list
的是带有标记化单词的列表列表。
我收到以下错误:
Traceback (most recent call last):
File "<ipython-input-621-b0a9caec82d4>", line 4, in <module>
word_weighing = 1)
File "<ipython-input-617-6f3fdcecd170>", line 90, in __init__
gtw = self.calc_entropy()
AttributeError: 'Weighing' object has no attribute 'calc_entropy'
我查看了其他一些类似的 SO 链接,2,,,但其中 none 似乎适用于此。
我该怎么做才能克服这个错误?
编辑:
我已将代码更新为:
class Weighing:
def __init__(self, input_file, word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i, word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = Counter(document)
for word in set(document):
self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
if self.word_weighing_ == 1:
self.gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
self.gtw = self.calc_idf()
elif self.word_weighing_ == 3:
self.gtw = self.calc_normal()
elif self.word_weighing_ == 4:
self.gtw = self.calc_probidf()
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i, count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document, 1)
for word in set(document):
binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
但是,我仍然得到错误:
AttributeError: 'Weighing' object has no attribute 'calc_entropy'
现在,我在初始化函数之前调用它。如何更改我的代码以便在初始化 self.gtw
之前初始化 def calc_entropy
?
这似乎是一个缩进问题:您在 __init__()
函数中而不是在 class.
中定义方法函数,如 calc_entropy()
应该是:
class Weighing:
def __init__(self):
# your init
def calc_entropy(self):
# your method
对于给定的标记化文本语料库,我想使用多种加权技术来执行单词加权。为此,我创建了以下 class:
class Weighing:
def __init__(self, input_file, word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i, word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = Counter(document)
for word in set(document):
self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i, count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document, 1)
for word in set(document):
binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
if self.word_weighing_ == 1:
gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
gtw = self.calc_idf()
elif self.word_weighing_ == 3:
gtw = self.calc_normal()
elif self.word_weighing_ == 4:
gtw = self.calc_probidf()
现在,当我 运行:
model = Weighing(input_file = data_list,
word_weighing = 1)
带有 data_list
的是带有标记化单词的列表列表。
我收到以下错误:
Traceback (most recent call last):
File "<ipython-input-621-b0a9caec82d4>", line 4, in <module>
word_weighing = 1)
File "<ipython-input-617-6f3fdcecd170>", line 90, in __init__
gtw = self.calc_entropy()
AttributeError: 'Weighing' object has no attribute 'calc_entropy'
我查看了其他一些类似的 SO 链接
我该怎么做才能克服这个错误?
编辑:
我已将代码更新为:
class Weighing:
def __init__(self, input_file, word_weighing):
self.input_file_ = input_file #List in which each element is a list of tokens
self.word_weighing_ = word_weighing
self.num_documents = len(self.input_file_)
#Set with all unique words from the corpus
self.vocabulary = set()
for text in self.input_file_:
self.vocabulary.update(text)
self.vocabulary_size = len(self.vocabulary)
#Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
self.word_to_index = dict()
self.index_to_word = dict()
for i, word in enumerate(self.vocabulary):
self.word_to_index[word] = i
self.index_to_word[i] = word
#Create sparse Document-Term Matrix (DTM)
self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = Counter(document)
for word in set(document):
self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
if self.word_weighing_ == 1:
self.gtw = self.calc_entropy()
elif self.word_weighing_ == 2:
self.gtw = self.calc_idf()
elif self.word_weighing_ == 3:
self.gtw = self.calc_normal()
elif self.word_weighing_ == 4:
self.gtw = self.calc_probidf()
#Get word count for all documents to calculate sparse_p_ij
self.sum_words = Counter()
for doc in self.input_file_:
self.sum_words.update(Counter(doc))
#Create probability of word i in document j. Format: sparse matrix
def create_sparse_p_ij (self):
sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for j in range(self.num_documents):
row_counts = self.sparse_dtm.getrow(j).toarray()[0]
word_index = row_counts.nonzero()[0]
non_zero_row_counts = row_counts[row_counts != 0]
for i, count in enumerate(non_zero_row_counts):
word = self.index_to_word[word_index[i]]
prob_ij = count/self.sum_words[word]
sparse_p_ij[j,word_index[i]] = prob_ij
return sparse_p_ij
#Create a binary sparse dtm. Format: sparse matrix
def create_sparse_binary_dtm(self):
binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
for doc_index, document in enumerate(self.input_file_):
document_counter = dict.fromkeys(document, 1)
for word in set(document):
binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word] # Update element
return binary_sparse_dtm
#2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
def calc_entropy(self):
sparse_p_ij = self.create_sparse_p_ij()
summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])
def calc_idf(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
def calc_normal(self):
summed_words = self.sparse_dtm.sum(0).tolist()[0]
return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
def calc_probidf (self):
binary_sparse_dtm = self.create_sparse_binary_dtm()
summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
但是,我仍然得到错误:
AttributeError: 'Weighing' object has no attribute 'calc_entropy'
现在,我在初始化函数之前调用它。如何更改我的代码以便在初始化 self.gtw
之前初始化 def calc_entropy
?
这似乎是一个缩进问题:您在 __init__()
函数中而不是在 class.
calc_entropy()
应该是:
class Weighing:
def __init__(self):
# your init
def calc_entropy(self):
# your method