'NoneType' 对象没有属性 'find_all' 错误

'NoneType' object has no attribute 'find_all' error

请用我的代码帮助我。

relation_tables = char_soup.find('ul', class_='subNav').find_all('li')
like_page_url = url + relation_tables[2].find('a').get('href')  # Get like page's url
dislike_page_url = url + relation_tables[3].find('a').get('href')  # Get dislike page's url
like_r = requests.get(like_page_url)  # Get source of page with users who liked/disliked
dislike_r = requests.get(dislike_page_url)
like_soup = BeautifulSoup(like_r.text, 'html.parser')
dislike_soup = BeautifulSoup(dislike_r.text, 'html.parser')
like_pages = int(like_soup.find('ul', class_='nav').find_all('li')[13].text)
dislike_pages = int(dislike_soup.find('ul', class_='nav').find_all('li')[13].text)
n = like_soup.find('table', class_='pure-table striped').find_all('tr')  # WORKS
for i in range(0, like_pages):
    like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
    curr_character_like_names.extend([f'{url}{tr.find("a").text}' for tr in like_users_trs])  # Get
                                                                                                    # all users names
    like_page_url = url + like_soup.find('li', class_='next').find('a').get('href')  # and extend them to a list
    like_r = requests.get(like_page_url)  # Then find 'next' button and get next page's url
    like_soup = BeautifulSoup(like_r.text, 'html.parser')  # Get source of the next page

此代码应从包含喜欢角色和不喜欢角色的用户(2 个不同页面)的页面中获取用户名列表。问题是做同样事情的两条线之一不起作用: n = like_soup.find('table', class_='pure-table striped').find_all('tr')(该行仅供测试) 那个在循环之外并且工作良好,但循环内的相等行(like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr'))抛出错误:

Traceback (most recent call last):
  File "/home/sekki/Documents/Pycharm/anime_planetDB/main.py", line 131, in <module>
    like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
AttributeError: 'NoneType' object has no attribute 'find_all'

附加信息:

看起来你把这个复杂化了一点。查看模式,一旦您翻过最后一页,名称就会开始重复。所以只需执行 while True 循环直到发生这种情况。

其次,让 pandas 为您解析 table:

import pandas as pd
import requests


def get_date(url):
    df = pd.DataFrame(columns=[0])
    page = 1
    continueLoop = True
    while continueLoop == True:
        url_page = f'{url}?page={page}'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
        response = requests.get(url_page, headers=headers).text
        temp_df = pd.read_html(response)[0]
        
        if list(temp_df[0])[0] not in list(df[0]):
            print(f'Collected Page: {page}')
            df = df.append(temp_df)
            page+=1
        else:
            continueLoop = False
            
    return df


dfLoves = get_date('https://www.anime-planet.com/characters/armin-arlelt/loves')
dfHates = get_date('https://www.anime-planet.com/characters/armin-arlelt/hates')

输出:

print(dfLoves)
                  0
0         atsumuboo
1         Ken0brien
2           Kabooom
3          xsleepyn
4    camoteconpapas
..              ...
21        SonSoneca
22  SayaSpringfield
23          Kurasan
24     HikaruTenshi
0     silvertail123

[15026 rows x 1 columns]

print(dfHates)
                 0
0           selvnq
1   LiveLaughLuffy
2      SixxTheGoat
3        IceWolfTO
4         Sam234io
..             ...
11     phoenix5793
12          Tyrano
13      SimplyTosh
14      KrystaChan
15     SHADOWORZA0

[2591 rows x 1 columns]