AttributeError: 'NoneType' object has no attribute 'text' - BeautifulShop
AttributeError: 'NoneType' object has no attribute 'text' - BeautifulShop
我有一些来自 fbref 的 scraping 信息的代码(link 数据:https://fbref.com/en/comps/9/stats/Premier-League-Stats),它运行良好,但现在我遇到了一些问题具有某些功能(我检查过现在不起作用的字段是“玩家”、“国籍”、“位置”、“小队”、“年龄”、“birth_year”)。我还检查了这些字段在网络中的名称是否与以前相同。有什么ideas/help可以解决的问题吗?
非常感谢!
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import sys, getopt
import csv
def get_tables(url):
res = requests.get(url)
## The next two lines get around the issue with comments breaking the parsing.
comm = re.compile("<!--|-->")
soup = BeautifulSoup(comm.sub("",res.text),'lxml')
all_tables = soup.findAll("tbody")
team_table = all_tables[0]
player_table = all_tables[1]
return player_table, team_table
def get_frame(features, player_table):
pre_df_player = dict()
features_wanted_player = features
rows_player = player_table.find_all('tr')
for row in rows_player:
if(row.find('th',{"scope":"row"}) != None):
for f in features_wanted_player:
cell = row.find("td",{"data-stat": f})
a = cell.text.strip().encode()
text=a.decode("utf-8")
if(text == ''):
text = '0'
if((f!='player')&(f!='nationality')&(f!='position')&(f!='squad')&(f!='age')&(f!='birth_year')):
text = float(text.replace(',',''))
if f in pre_df_player:
pre_df_player[f].append(text)
else:
pre_df_player[f] = [text]
df_player = pd.DataFrame.from_dict(pre_df_player)
return df_player
stats = ["player","nationality","position","squad","age","birth_year","games","games_starts","minutes","goals","assists","pens_made","pens_att","cards_yellow","cards_red","goals_per90","assists_per90","goals_assists_per90","goals_pens_per90","goals_assists_pens_per90","xg","npxg","xa","xg_per90","xa_per90","xg_xa_per90","npxg_per90","npxg_xa_per90"]
def frame_for_category(category,top,end,features):
url = (top + category + end)
player_table, team_table = get_tables(url)
df_player = get_frame(features, player_table)
return df_player
top='https://fbref.com/en/comps/9/'
end='/Premier-League-Stats'
df1 = frame_for_category('stats',top,end,stats)
df1
我建议用熊猫的 read_html
加载 table。在共享和导出下有一个直接 link 到这个 table --> 嵌入这个 Table.
import pandas as pd
df = pd.read_html("https://widgets.sports-reference.com/wg.fcgi?css=1&site=fb&url=%2Fen%2Fcomps%2F9%2Fstats%2FPremier-League-Stats&div=div_stats_standard", header=1)
这会输出一个数据帧列表,table 可以作为 df[0]
访问。输出 df[0].head()
:
Rk
Player
Nation
Pos
Squad
Age
Born
MP
Starts
Min
90s
Gls
Ast
G-PK
PK
PKatt
CrdY
CrdR
Gls.1
Ast.1
G+A
G-PK.1
G+A-PK
xG
npxG
xA
npxG+xA
xG.1
xA.1
xG+xA
npxG.1
npxG+xA.1
Matches
0
1
Patrick van Aanholt
nl NED
DF
Crystal Palace
30-190
1990
16
15
1324
14.7
0
1
0
0
0
1
0
0
0.07
0.07
0
0.07
1.2
1.2
0.8
2
0.08
0.05
0.13
0.08
0.13
Matches
1
2
Tammy Abraham
eng ENG
FW
Chelsea
23-156
1997
20
12
1021
11.3
6
1
6
0
0
0
0
0.53
0.09
0.62
0.53
0.62
5.6
5.6
0.9
6.5
0.49
0.08
0.57
0.49
0.57
Matches
2
3
Che Adams
eng ENG
FW
Southampton
24-237
1996
26
22
1985
22.1
5
4
5
0
0
1
0
0.23
0.18
0.41
0.23
0.41
5.5
5.5
4.3
9.9
0.25
0.2
0.45
0.25
0.45
Matches
3
4
Tosin Adarabioyo
eng ENG
DF
Fulham
23-164
1997
23
23
2070
23
0
0
0
0
0
1
0
0
0
0
0
0
1
1
0.1
1.1
0.04
0.01
0.05
0.04
0.05
Matches
4
5
Adrián
es ESP
GK
Liverpool
34-063
1987
3
3
270
3
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Matches
如果您只关注球员统计数据,请将 player_table = all_tables[1]
更改为 player_table = all_tables[2]
,因为现在您正在将球队 table 提供给 get_frame
函数。
我试过了,之后效果很好。
我有一些来自 fbref 的 scraping 信息的代码(link 数据:https://fbref.com/en/comps/9/stats/Premier-League-Stats),它运行良好,但现在我遇到了一些问题具有某些功能(我检查过现在不起作用的字段是“玩家”、“国籍”、“位置”、“小队”、“年龄”、“birth_year”)。我还检查了这些字段在网络中的名称是否与以前相同。有什么ideas/help可以解决的问题吗?
非常感谢!
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import sys, getopt
import csv
def get_tables(url):
res = requests.get(url)
## The next two lines get around the issue with comments breaking the parsing.
comm = re.compile("<!--|-->")
soup = BeautifulSoup(comm.sub("",res.text),'lxml')
all_tables = soup.findAll("tbody")
team_table = all_tables[0]
player_table = all_tables[1]
return player_table, team_table
def get_frame(features, player_table):
pre_df_player = dict()
features_wanted_player = features
rows_player = player_table.find_all('tr')
for row in rows_player:
if(row.find('th',{"scope":"row"}) != None):
for f in features_wanted_player:
cell = row.find("td",{"data-stat": f})
a = cell.text.strip().encode()
text=a.decode("utf-8")
if(text == ''):
text = '0'
if((f!='player')&(f!='nationality')&(f!='position')&(f!='squad')&(f!='age')&(f!='birth_year')):
text = float(text.replace(',',''))
if f in pre_df_player:
pre_df_player[f].append(text)
else:
pre_df_player[f] = [text]
df_player = pd.DataFrame.from_dict(pre_df_player)
return df_player
stats = ["player","nationality","position","squad","age","birth_year","games","games_starts","minutes","goals","assists","pens_made","pens_att","cards_yellow","cards_red","goals_per90","assists_per90","goals_assists_per90","goals_pens_per90","goals_assists_pens_per90","xg","npxg","xa","xg_per90","xa_per90","xg_xa_per90","npxg_per90","npxg_xa_per90"]
def frame_for_category(category,top,end,features):
url = (top + category + end)
player_table, team_table = get_tables(url)
df_player = get_frame(features, player_table)
return df_player
top='https://fbref.com/en/comps/9/'
end='/Premier-League-Stats'
df1 = frame_for_category('stats',top,end,stats)
df1
我建议用熊猫的 read_html
加载 table。在共享和导出下有一个直接 link 到这个 table --> 嵌入这个 Table.
import pandas as pd
df = pd.read_html("https://widgets.sports-reference.com/wg.fcgi?css=1&site=fb&url=%2Fen%2Fcomps%2F9%2Fstats%2FPremier-League-Stats&div=div_stats_standard", header=1)
这会输出一个数据帧列表,table 可以作为 df[0]
访问。输出 df[0].head()
:
Rk | Player | Nation | Pos | Squad | Age | Born | MP | Starts | Min | 90s | Gls | Ast | G-PK | PK | PKatt | CrdY | CrdR | Gls.1 | Ast.1 | G+A | G-PK.1 | G+A-PK | xG | npxG | xA | npxG+xA | xG.1 | xA.1 | xG+xA | npxG.1 | npxG+xA.1 | Matches | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Patrick van Aanholt | nl NED | DF | Crystal Palace | 30-190 | 1990 | 16 | 15 | 1324 | 14.7 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0.07 | 0.07 | 0 | 0.07 | 1.2 | 1.2 | 0.8 | 2 | 0.08 | 0.05 | 0.13 | 0.08 | 0.13 | Matches |
1 | 2 | Tammy Abraham | eng ENG | FW | Chelsea | 23-156 | 1997 | 20 | 12 | 1021 | 11.3 | 6 | 1 | 6 | 0 | 0 | 0 | 0 | 0.53 | 0.09 | 0.62 | 0.53 | 0.62 | 5.6 | 5.6 | 0.9 | 6.5 | 0.49 | 0.08 | 0.57 | 0.49 | 0.57 | Matches |
2 | 3 | Che Adams | eng ENG | FW | Southampton | 24-237 | 1996 | 26 | 22 | 1985 | 22.1 | 5 | 4 | 5 | 0 | 0 | 1 | 0 | 0.23 | 0.18 | 0.41 | 0.23 | 0.41 | 5.5 | 5.5 | 4.3 | 9.9 | 0.25 | 0.2 | 0.45 | 0.25 | 0.45 | Matches |
3 | 4 | Tosin Adarabioyo | eng ENG | DF | Fulham | 23-164 | 1997 | 23 | 23 | 2070 | 23 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0.1 | 1.1 | 0.04 | 0.01 | 0.05 | 0.04 | 0.05 | Matches |
4 | 5 | Adrián | es ESP | GK | Liverpool | 34-063 | 1987 | 3 | 3 | 270 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Matches |
如果您只关注球员统计数据,请将 player_table = all_tables[1]
更改为 player_table = all_tables[2]
,因为现在您正在将球队 table 提供给 get_frame
函数。
我试过了,之后效果很好。