'NoneType' 对象没有属性 'find_all' 错误
'NoneType' object has no attribute 'find_all' error
请用我的代码帮助我。
relation_tables = char_soup.find('ul', class_='subNav').find_all('li')
like_page_url = url + relation_tables[2].find('a').get('href') # Get like page's url
dislike_page_url = url + relation_tables[3].find('a').get('href') # Get dislike page's url
like_r = requests.get(like_page_url) # Get source of page with users who liked/disliked
dislike_r = requests.get(dislike_page_url)
like_soup = BeautifulSoup(like_r.text, 'html.parser')
dislike_soup = BeautifulSoup(dislike_r.text, 'html.parser')
like_pages = int(like_soup.find('ul', class_='nav').find_all('li')[13].text)
dislike_pages = int(dislike_soup.find('ul', class_='nav').find_all('li')[13].text)
n = like_soup.find('table', class_='pure-table striped').find_all('tr') # WORKS
for i in range(0, like_pages):
like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
curr_character_like_names.extend([f'{url}{tr.find("a").text}' for tr in like_users_trs]) # Get
# all users names
like_page_url = url + like_soup.find('li', class_='next').find('a').get('href') # and extend them to a list
like_r = requests.get(like_page_url) # Then find 'next' button and get next page's url
like_soup = BeautifulSoup(like_r.text, 'html.parser') # Get source of the next page
此代码应从包含喜欢角色和不喜欢角色的用户(2 个不同页面)的页面中获取用户名列表。问题是做同样事情的两条线之一不起作用:
n = like_soup.find('table', class_='pure-table striped').find_all('tr')
(该行仅供测试)
那个在循环之外并且工作良好,但循环内的相等行(like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr')
)抛出错误:
Traceback (most recent call last):
File "/home/sekki/Documents/Pycharm/anime_planetDB/main.py", line 131, in <module>
like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
AttributeError: 'NoneType' object has no attribute 'find_all'
附加信息:
- like_page_url = https://www.anime-planet.com/characters/armin-arlelt/loves
- dislike_page_url = https://www.anime-planet.com/characters/armin-arlelt/hates
看起来你把这个复杂化了一点。查看模式,一旦您翻过最后一页,名称就会开始重复。所以只需执行 while True
循环直到发生这种情况。
其次,让 pandas
为您解析 table:
import pandas as pd
import requests
def get_date(url):
df = pd.DataFrame(columns=[0])
page = 1
continueLoop = True
while continueLoop == True:
url_page = f'{url}?page={page}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
response = requests.get(url_page, headers=headers).text
temp_df = pd.read_html(response)[0]
if list(temp_df[0])[0] not in list(df[0]):
print(f'Collected Page: {page}')
df = df.append(temp_df)
page+=1
else:
continueLoop = False
return df
dfLoves = get_date('https://www.anime-planet.com/characters/armin-arlelt/loves')
dfHates = get_date('https://www.anime-planet.com/characters/armin-arlelt/hates')
输出:
print(dfLoves)
0
0 atsumuboo
1 Ken0brien
2 Kabooom
3 xsleepyn
4 camoteconpapas
.. ...
21 SonSoneca
22 SayaSpringfield
23 Kurasan
24 HikaruTenshi
0 silvertail123
[15026 rows x 1 columns]
print(dfHates)
0
0 selvnq
1 LiveLaughLuffy
2 SixxTheGoat
3 IceWolfTO
4 Sam234io
.. ...
11 phoenix5793
12 Tyrano
13 SimplyTosh
14 KrystaChan
15 SHADOWORZA0
[2591 rows x 1 columns]
请用我的代码帮助我。
relation_tables = char_soup.find('ul', class_='subNav').find_all('li')
like_page_url = url + relation_tables[2].find('a').get('href') # Get like page's url
dislike_page_url = url + relation_tables[3].find('a').get('href') # Get dislike page's url
like_r = requests.get(like_page_url) # Get source of page with users who liked/disliked
dislike_r = requests.get(dislike_page_url)
like_soup = BeautifulSoup(like_r.text, 'html.parser')
dislike_soup = BeautifulSoup(dislike_r.text, 'html.parser')
like_pages = int(like_soup.find('ul', class_='nav').find_all('li')[13].text)
dislike_pages = int(dislike_soup.find('ul', class_='nav').find_all('li')[13].text)
n = like_soup.find('table', class_='pure-table striped').find_all('tr') # WORKS
for i in range(0, like_pages):
like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
curr_character_like_names.extend([f'{url}{tr.find("a").text}' for tr in like_users_trs]) # Get
# all users names
like_page_url = url + like_soup.find('li', class_='next').find('a').get('href') # and extend them to a list
like_r = requests.get(like_page_url) # Then find 'next' button and get next page's url
like_soup = BeautifulSoup(like_r.text, 'html.parser') # Get source of the next page
此代码应从包含喜欢角色和不喜欢角色的用户(2 个不同页面)的页面中获取用户名列表。问题是做同样事情的两条线之一不起作用:
n = like_soup.find('table', class_='pure-table striped').find_all('tr')
(该行仅供测试)
那个在循环之外并且工作良好,但循环内的相等行(like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr')
)抛出错误:
Traceback (most recent call last):
File "/home/sekki/Documents/Pycharm/anime_planetDB/main.py", line 131, in <module>
like_users_trs = like_soup.find('table', class_='pure-table striped').find_all('tr') # DON'T
AttributeError: 'NoneType' object has no attribute 'find_all'
附加信息:
- like_page_url = https://www.anime-planet.com/characters/armin-arlelt/loves
- dislike_page_url = https://www.anime-planet.com/characters/armin-arlelt/hates
看起来你把这个复杂化了一点。查看模式,一旦您翻过最后一页,名称就会开始重复。所以只需执行 while True
循环直到发生这种情况。
其次,让 pandas
为您解析 table:
import pandas as pd
import requests
def get_date(url):
df = pd.DataFrame(columns=[0])
page = 1
continueLoop = True
while continueLoop == True:
url_page = f'{url}?page={page}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
response = requests.get(url_page, headers=headers).text
temp_df = pd.read_html(response)[0]
if list(temp_df[0])[0] not in list(df[0]):
print(f'Collected Page: {page}')
df = df.append(temp_df)
page+=1
else:
continueLoop = False
return df
dfLoves = get_date('https://www.anime-planet.com/characters/armin-arlelt/loves')
dfHates = get_date('https://www.anime-planet.com/characters/armin-arlelt/hates')
输出:
print(dfLoves)
0
0 atsumuboo
1 Ken0brien
2 Kabooom
3 xsleepyn
4 camoteconpapas
.. ...
21 SonSoneca
22 SayaSpringfield
23 Kurasan
24 HikaruTenshi
0 silvertail123
[15026 rows x 1 columns]
print(dfHates)
0
0 selvnq
1 LiveLaughLuffy
2 SixxTheGoat
3 IceWolfTO
4 Sam234io
.. ...
11 phoenix5793
12 Tyrano
13 SimplyTosh
14 KrystaChan
15 SHADOWORZA0
[2591 rows x 1 columns]