为什么这个 find 方法返回 None 并抛出错误?
why is this find method returning None and throwing error?
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
def tableData(data, attrs):
row = []
data = data.find(attrs=attrs)
tr = data.find_all('tr')
header = [ th.get_text(strip=True) for th in data.find_all('th') ]
if header:
row.append(header)
for tr in tr[1:]:
row.append([ td.get_text(strip=True) for td in tr.find_all('td')])
return row
url1 = 'https://www.nfl.com/standings/league/2019/REG'
page1 = requests.get(url1)
soup1 = BeautifulSoup(page1.text, 'lxml')
table = soup1.find('table', attrs={'summary': 'Standings - Detailed View'})
# print(table)
print(tableData(table, {'summary': 'Standings - Detailed View'}))
即使在调试器中我也看到数据的值包含 Standings - 详细视图 但是当 data.find(attrs=attrs)
是 运行 时似乎 return None
您已经拥有 table:table
。它有行和一切,但它没有任何进一步的“排名 - 详细视图”属性。直接进入行:
def tableData(data):#, attrs):
row = []
#data = data.find(attrs=attrs)
tr = data.find_all('tr')
更好的是,使用 pandas(无论如何导入它)将 table 提取为数据框:
df = pd.read_html('https://www.nfl.com/standings/league/2019/REG')[0]
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
def tableData(data, attrs):
row = []
data = data.find(attrs=attrs)
tr = data.find_all('tr')
header = [ th.get_text(strip=True) for th in data.find_all('th') ]
if header:
row.append(header)
for tr in tr[1:]:
row.append([ td.get_text(strip=True) for td in tr.find_all('td')])
return row
url1 = 'https://www.nfl.com/standings/league/2019/REG'
page1 = requests.get(url1)
soup1 = BeautifulSoup(page1.text, 'lxml')
table = soup1.find('table', attrs={'summary': 'Standings - Detailed View'})
# print(table)
print(tableData(table, {'summary': 'Standings - Detailed View'}))
即使在调试器中我也看到数据的值包含 Standings - 详细视图 但是当 data.find(attrs=attrs)
是 运行 时似乎 return None
您已经拥有 table:table
。它有行和一切,但它没有任何进一步的“排名 - 详细视图”属性。直接进入行:
def tableData(data):#, attrs):
row = []
#data = data.find(attrs=attrs)
tr = data.find_all('tr')
更好的是,使用 pandas(无论如何导入它)将 table 提取为数据框:
df = pd.read_html('https://www.nfl.com/standings/league/2019/REG')[0]