AttributeError: '' object has no attribute '' - in-class declared variable is not recognized

AttributeError: '' object has no attribute '' - in-class declared variable is not recognized

对于给定的标记化文本语料库,我想使用多种加权技术来执行单词加权。为此,我创建了以下 class:

class Weighing:
    def __init__(self, input_file, word_weighing):
        self.input_file_ = input_file #List in which each element is a list of tokens
        self.word_weighing_ = word_weighing
        self.num_documents = len(self.input_file_)
        
        #Set with all unique words from the corpus
        self.vocabulary = set()
        for text in self.input_file_:  
            self.vocabulary.update(text)
        self.vocabulary_size = len(self.vocabulary)
        
        #Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
        self.word_to_index = dict()
        self.index_to_word = dict()
        for i, word in enumerate(self.vocabulary):
            self.word_to_index[word] = i
            self.index_to_word[i] = word
        
        #Create sparse Document-Term Matrix (DTM)
        self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
        for doc_index, document in enumerate(self.input_file_):
            document_counter = Counter(document)
            for word in set(document):
                self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word]    # Update element
            
        #Get word count for all documents to calculate sparse_p_ij
        self.sum_words = Counter()
        for doc in self.input_file_:
            self.sum_words.update(Counter(doc))
            
        #Create probability of word i in document j. Format: sparse matrix
        def create_sparse_p_ij (self):
            sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
            for j in range(self.num_documents):
                row_counts = self.sparse_dtm.getrow(j).toarray()[0]
                word_index = row_counts.nonzero()[0]
                non_zero_row_counts = row_counts[row_counts != 0]
                
                for i, count in enumerate(non_zero_row_counts):
                    word = self.index_to_word[word_index[i]]
                    prob_ij = count/self.sum_words[word]
                    sparse_p_ij[j,word_index[i]] = prob_ij
            return sparse_p_ij
        
        #Create a binary sparse dtm. Format: sparse matrix
        def create_sparse_binary_dtm(self):    
            binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
            for doc_index, document in enumerate(self.input_file_):
                document_counter = dict.fromkeys(document, 1)
                for word in set(document):
                    binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word]    # Update element
            return binary_sparse_dtm
    
        #2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
        def calc_entropy(self):
            sparse_p_ij = self.create_sparse_p_ij()
            summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
            return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])       
        
        def calc_idf(self):
            summed_words = self.sparse_dtm.sum(0).tolist()[0]
            return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
        
        def calc_normal(self):
            summed_words = self.sparse_dtm.sum(0).tolist()[0]
            return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
        
        def calc_probidf (self):
            binary_sparse_dtm = self.create_sparse_binary_dtm()
            summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
            return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])
                
        if self.word_weighing_ ==  1:
            gtw = self.calc_entropy()
        elif self.word_weighing_ == 2:
            gtw = self.calc_idf()
        elif self.word_weighing_ == 3:
            gtw = self.calc_normal()
        elif self.word_weighing_ == 4:
            gtw = self.calc_probidf()

现在,当我 运行:

model = Weighing(input_file = data_list,
             word_weighing = 1)

带有 data_list 的是带有标记化单词的列表列表。

我收到以下错误:

Traceback (most recent call last):

  File "<ipython-input-621-b0a9caec82d4>", line 4, in <module>
    word_weighing = 1)

  File "<ipython-input-617-6f3fdcecd170>", line 90, in __init__
    gtw = self.calc_entropy()

AttributeError: 'Weighing' object has no attribute 'calc_entropy'

我查看了其他一些类似的 SO 链接,2,,,但其中 none 似乎适用于此。

我该怎么做才能克服这个错误?


编辑:

我已将代码更新为:

class Weighing:
    def __init__(self, input_file, word_weighing):
        self.input_file_ = input_file #List in which each element is a list of tokens
        self.word_weighing_ = word_weighing
        self.num_documents = len(self.input_file_)
            
        #Set with all unique words from the corpus
        self.vocabulary = set()
        for text in self.input_file_:  
            self.vocabulary.update(text)
        self.vocabulary_size = len(self.vocabulary)
            
        #Create dictionary that returns index for a token or token for an index of the corpus' vocabulary
        self.word_to_index = dict()
        self.index_to_word = dict()
        for i, word in enumerate(self.vocabulary):
            self.word_to_index[word] = i
            self.index_to_word[i] = word
       
        #Create sparse Document-Term Matrix (DTM)
        self.sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
        for doc_index, document in enumerate(self.input_file_):
            document_counter = Counter(document)
            for word in set(document):
                self.sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word]    # Update element
                
      
        if self.word_weighing_ ==  1:
            self.gtw = self.calc_entropy()
        elif self.word_weighing_ == 2:
            self.gtw = self.calc_idf()
        elif self.word_weighing_ == 3:
            self.gtw = self.calc_normal()
        elif self.word_weighing_ == 4:
            self.gtw = self.calc_probidf()
        
        #Get word count for all documents to calculate sparse_p_ij
        self.sum_words = Counter()
        for doc in self.input_file_:
            self.sum_words.update(Counter(doc))
            
    #Create probability of word i in document j. Format: sparse matrix
    def create_sparse_p_ij (self):
        sparse_p_ij = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
        for j in range(self.num_documents):
            row_counts = self.sparse_dtm.getrow(j).toarray()[0]
            word_index = row_counts.nonzero()[0]
            non_zero_row_counts = row_counts[row_counts != 0]
                
            for i, count in enumerate(non_zero_row_counts):
                word = self.index_to_word[word_index[i]]
                prob_ij = count/self.sum_words[word]
                sparse_p_ij[j,word_index[i]] = prob_ij
        return sparse_p_ij
        
    #Create a binary sparse dtm. Format: sparse matrix
    def create_sparse_binary_dtm(self):    
        binary_sparse_dtm = dok_matrix((self.num_documents, self.vocabulary_size), dtype=np.float32)
        for doc_index, document in enumerate(self.input_file_):
            document_counter = dict.fromkeys(document, 1)
            for word in set(document):
                binary_sparse_dtm[doc_index, self.word_to_index[word]] = document_counter[word]    # Update element
        return binary_sparse_dtm
    
    #2) Calculate Global Term weighting (4 methods: entropy, IDF, Probabilistic IDF, Normal)
    def calc_entropy(self):
        sparse_p_ij = self.create_sparse_p_ij()
        summed_word_probabilities = sparse_p_ij.sum(0).tolist()[0]
        return np.array([1+((word_probability * np.log2(word_probability))/np.log2(self.num_documents)) for word_probability in summed_word_probabilities])       
       
    def calc_idf(self):
        summed_words = self.sparse_dtm.sum(0).tolist()[0]
        return np.array([np.log2(self.num_documents/word_count) for word_count in summed_words])
        
    def calc_normal(self):
        summed_words = self.sparse_dtm.sum(0).tolist()[0]
        return np.array([1/(math.sqrt(word_count**2)) for word_count in summed_words])
        
    def calc_probidf (self):
        binary_sparse_dtm = self.create_sparse_binary_dtm()
        summed_binary_words_list = binary_sparse_dtm.sum(0).tolist()[0]
        return np.array([np.log2((self.num_documents - binary_word_count)/binary_word_count) for binary_word_count in summed_binary_words_list])

但是,我仍然得到错误:

AttributeError: 'Weighing' object has no attribute 'calc_entropy'

现在,我在初始化函数之前调用它。如何更改我的代码以便在初始化 self.gtw 之前初始化 def calc_entropy?

这似乎是一个缩进问题:您在 __init__() 函数中而不是在 class.

中定义方法函数,如 calc_entropy()

应该是:

class Weighing:
    def __init__(self):
        # your init

    def calc_entropy(self):
        # your method