AttributeError: ' ' object has no attribute 'soup'
AttributeError: ' ' object has no attribute 'soup'
当我调用以下函数时
def get_words(self):
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
wrds_blcks = []
for i, block in enumerate(blocks):
if block['blockType'] == 'Table':
rslt = self._get_words_from_block_table(block)
else:
rslt = self._get_words_from_block_text(block)
rslt = self._cleanup_word(rslt)
if rslt != [[]] and rslt != []:
wrds_blcks.append(rslt)
return wrds_blcks
我得到以下错误
in get_words
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
AttributeError: 'AbbyExtractor' object has no attribute 'soup'
参考第一行:
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
怎么了?
您需要先制作 soup
。将从网页检索到的 html
代码作为参数传递给 get_words
方法。并制作soup
。然后做你的任务。
def get_words(self, html):
self.soup = BeautifulSoup(html,"lxml")
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
wrds_blcks = []
for i, block in enumerate(blocks):
if block['blockType'] == 'Table':
rslt = self._get_words_from_block_table(block)
else:
rslt = self._get_words_from_block_text(block)
rslt = self._cleanup_word(rslt)
if rslt != [[]] and rslt != []:
wrds_blcks.append(rslt)
return wrds_blcks
当我调用以下函数时
def get_words(self):
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
wrds_blcks = []
for i, block in enumerate(blocks):
if block['blockType'] == 'Table':
rslt = self._get_words_from_block_table(block)
else:
rslt = self._get_words_from_block_text(block)
rslt = self._cleanup_word(rslt)
if rslt != [[]] and rslt != []:
wrds_blcks.append(rslt)
return wrds_blcks
我得到以下错误
in get_words
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
AttributeError: 'AbbyExtractor' object has no attribute 'soup'
参考第一行:
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
怎么了?
您需要先制作 soup
。将从网页检索到的 html
代码作为参数传递给 get_words
方法。并制作soup
。然后做你的任务。
def get_words(self, html):
self.soup = BeautifulSoup(html,"lxml")
blocks = self.soup.find_all("block", {"blockType": lambda x: x not in ('Separator', 'SeparatorsBox')})
wrds_blcks = []
for i, block in enumerate(blocks):
if block['blockType'] == 'Table':
rslt = self._get_words_from_block_table(block)
else:
rslt = self._get_words_from_block_text(block)
rslt = self._cleanup_word(rslt)
if rslt != [[]] and rslt != []:
wrds_blcks.append(rslt)
return wrds_blcks