AttributeError: ' ' object has no attribute 'soup'

AttributeError: ' ' object has no attribute 'soup'

当我调用以下函数时

def get_words(self):
    blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})

    wrds_blcks = []

    for i, block in enumerate(blocks):
        if block['blockType'] == 'Table':
            rslt = self._get_words_from_block_table(block)

        else:
            rslt = self._get_words_from_block_text(block)

        rslt = self._cleanup_word(rslt)
        if rslt != [[]] and rslt != []:
            wrds_blcks.append(rslt)

    return wrds_blcks

我得到以下错误

 in get_words
    blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
AttributeError: 'AbbyExtractor' object has no attribute 'soup'

参考第一行:

 blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})

怎么了?

您需要先制作 soup。将从网页检索到的 html 代码作为参数传递给 get_words 方法。并制作soup。然后做你的任务。

def get_words(self, html):

    self.soup = BeautifulSoup(html,"lxml")
    blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})

    wrds_blcks = []

    for i, block in enumerate(blocks):
        if block['blockType'] == 'Table':
            rslt = self._get_words_from_block_table(block)

        else:
            rslt = self._get_words_from_block_text(block)

        rslt = self._cleanup_word(rslt)
        if rslt != [[]] and rslt != []:
            wrds_blcks.append(rslt)

    return wrds_blcks