使用 BeautifulSoup 抓取 ESPN Fantasy Football
Using BeautifulSoup to Scrape ESPN Fantasy Football
我看过很多抓取 ESPN 梦幻足球联赛的例子。我对网络抓取非常陌生,但因此在发布之前已经对此进行了广泛的研究。我无法访问我的联盟并获得任何有用的信息。我想你应该根据请求传递 cookie,以识别你访问私人联盟的身份。
import requests
from bs4 import BeautifulSoup
page = requests.get('https://fantasy.espn.com/football/league?leagueId=########',
cookies={'SWID': '#######', 'espn_s2': '#######'}
)
soup = BeautifulSoup(page.text, 'html.parser')
test = soup.find_all(class_ = 'team-scores')
print(len(test))
print(type(test))
print(test)
0
class 'bs4.element.ResultSet'
[]
虽然根据本文 https://stmorse.github.io/journal/espn-fantasy-python.html 和文章本身引用的一些帖子,cookie 似乎很重要,但在没有 cookie 的情况下执行请求会得到相同的结果。如果使用和不使用饼干,我比较了汤,结果是一样的。
我知道有 API 可以在 ESPN 上使用,但我无法设法让任何代码为我工作。我希望抓取球队名称,然后从每支球队和 运行 球队的每一个可能的时间表中获取结果,以获得结果分布,并查看每支球队在我的联盟中有多幸运或不幸。我也很好奇与雅虎一起做这件事。在这一点上,我可以轻松地手动获取数据,因为它不是太多,但我想要一个更通用的形式。
对于没有经验的网络抓取工具,任何建议或帮助将不胜感激。
你必须分享你的联赛 ID 才能进行测试,但这里有一些代码可以对联赛进行一些数据操作。基本上,您将获得以 json 格式返回的数据,然后需要对其进行解析以根据每周积分计算出 wins/losses。然后你可以排序创建一个决赛 table 来比较常规赛记录和总记录,看看哪些球队根据时间表表现 above/below:
import requests
import pandas as pd
s = requests.Session()
r = s.get('https://www.espn.com')
swid = s.cookies.get_dict()['SWID']
league_id = 31181
url = 'https://fantasy.espn.com/apis/v3/games/ffl/seasons/2019/segments/0/leagues/%s' %league_id
r = requests.get(url, cookies={"swid": swid}).json()
#Get Team IDs
teamId = {}
for team in r['teams']:
teamId[team['id']] = team['location'].strip() + ' ' + team['nickname'].strip()
#Get each team's weekly points and calculate their head-to-head records
weeklyPoints = {}
r = requests.get(url, cookies={"swid": swid}, params={"view": "mMatchup"}).json()
weeklyPts = pd.DataFrame()
for each in r['schedule']:
#each = r['schedule'][0]
week = each['matchupPeriodId']
if week >= 14:
continue
homeTm = teamId[each['home']['teamId']]
homeTmPts = each['home']['totalPoints']
try:
awayTm = teamId[each['away']['teamId']]
awayTmPts = each['away']['totalPoints']
except:
homeTmPts = 'BYE'
continue
temp_df = pd.DataFrame(list(zip([homeTm, awayTm], [homeTmPts, awayTmPts], [week, week])), columns=['team','pts','week'])
if homeTmPts > awayTmPts:
temp_df.loc[0,'win'] = 1
temp_df.loc[0,'loss'] = 0
temp_df.loc[0,'tie'] = 0
temp_df.loc[1,'win'] = 0
temp_df.loc[1,'loss'] = 1
temp_df.loc[1,'tie'] = 0
elif homeTmPts < awayTmPts:
temp_df.loc[0,'win'] = 0
temp_df.loc[0,'loss'] = 1
temp_df.loc[0,'tie'] = 0
temp_df.loc[1,'win'] = 1
temp_df.loc[1,'loss'] = 0
temp_df.loc[1,'tie'] = 0
elif homeTmPts == awayTmPts:
temp_df.loc[0,'win'] = 0
temp_df.loc[0,'loss'] = 0
temp_df.loc[0,'tie'] = 1
temp_df.loc[1,'win'] = 0
temp_df.loc[1,'loss'] = 0
temp_df.loc[1,'tie'] = 1
weeklyPts = weeklyPts.append(temp_df, sort=True).reset_index(drop=True)
weeklyPts['win'] = weeklyPts.groupby(['team'])['win'].cumsum()
weeklyPts['loss'] = weeklyPts.groupby(['team'])['loss'].cumsum()
weeklyPts['tie'] = weeklyPts.groupby(['team'])['tie'].cumsum()
# Calculate each teams record compared to all other teams points week to week
cumWeeklyRecord = {}
for week in weeklyPts[weeklyPts['pts'] > 0]['week'].unique():
df = weeklyPts[weeklyPts['week'] == week]
cumWeeklyRecord[week] = {}
for idx, row in df.iterrows():
team = row['team']
pts = row['pts']
win = len(df[df['pts'] < pts])
loss = len(df[df['pts'] > pts])
tie = len(df[df['pts'] == pts])
cumWeeklyRecord[week][team] = {}
cumWeeklyRecord[week][team]['win'] = win
cumWeeklyRecord[week][team]['loss'] = loss
cumWeeklyRecord[week][team]['tie'] = tie-1
# Combine those cumluative records to get an overall season record
overallRecord = {}
for each in cumWeeklyRecord.items():
for team in each[1].keys():
if team not in overallRecord.keys():
overallRecord[team] = {}
win = each[1][team]['win']
loss = each[1][team]['loss']
tie = each[1][team]['tie']
if 'win' not in overallRecord[team].keys():
overallRecord[team]['win'] = win
else:
overallRecord[team]['win'] += win
if 'loss' not in overallRecord[team].keys():
overallRecord[team]['loss'] = loss
else:
overallRecord[team]['loss'] += loss
if 'tie' not in overallRecord[team].keys():
overallRecord[team]['tie'] = tie
else:
overallRecord[team]['tie'] += tie
# Little cleaning up of the data nd calculating win %
overallRecord_df = pd.DataFrame(overallRecord).T
overallRecord_df = overallRecord_df.rename_axis('team').reset_index()
overallRecord_df = overallRecord_df.rename(columns={'win':'overall_win', 'loss':'overall_loss','tie':'overall_tie'})
overallRecord_df['overall_win%'] = overallRecord_df['overall_win'] / (overallRecord_df['overall_win'] + overallRecord_df['overall_loss'] + overallRecord_df['overall_tie'])
overallRecord_df['overall_rank'] = overallRecord_df['overall_win%'].rank(ascending=False, method='min')
regularSeasRecord = weeklyPts[weeklyPts['week'] == 13][['team','win','loss', 'tie']]
regularSeasRecord['win%'] = regularSeasRecord['win'] / (regularSeasRecord['win'] + regularSeasRecord['loss'] + regularSeasRecord['tie'])
regularSeasRecord['rank'] = regularSeasRecord['win%'].rank(ascending=False, method='min')
final_df = overallRecord_df.merge(regularSeasRecord, how='left', on=['team'])
输出:
print (final_df.sort_values('rank').to_string())
team overall_loss overall_tie overall_win overall_win% overall_rank win loss tie win% rank
0 Luck Dynasty 39 0 104 0.727273 1.0 12.0 1.0 0.0 0.923077 1.0
10 Warsaw Widow Makers 48 0 95 0.664336 3.0 10.0 3.0 0.0 0.769231 2.0
2 Team Powell 60 0 83 0.580420 5.0 8.0 5.0 0.0 0.615385 3.0
1 Team White 46 0 97 0.678322 2.0 7.0 6.0 0.0 0.538462 4.0
3 The SouthWest Slingers 55 0 88 0.615385 4.0 7.0 6.0 0.0 0.538462 4.0
5 U MAD BRO? 71 0 72 0.503497 6.0 7.0 6.0 0.0 0.538462 4.0
11 Team Troxell 88 0 55 0.384615 9.0 7.0 6.0 0.0 0.538462 4.0
6 Organized Chaos 72 0 71 0.496503 7.0 6.0 7.0 0.0 0.461538 8.0
7 Jobobes Jabronis 88 0 55 0.384615 9.0 6.0 7.0 0.0 0.461538 8.0
4 Killa Bees!! 98 0 45 0.314685 11.0 4.0 9.0 0.0 0.307692 10.0
9 Faceless Men 86 0 57 0.398601 8.0 3.0 10.0 0.0 0.230769 11.0
8 Rollin with Mahomies 107 0 36 0.251748 12.0 1.0 12.0 0.0 0.076923 12.0
我看过很多抓取 ESPN 梦幻足球联赛的例子。我对网络抓取非常陌生,但因此在发布之前已经对此进行了广泛的研究。我无法访问我的联盟并获得任何有用的信息。我想你应该根据请求传递 cookie,以识别你访问私人联盟的身份。
import requests
from bs4 import BeautifulSoup
page = requests.get('https://fantasy.espn.com/football/league?leagueId=########',
cookies={'SWID': '#######', 'espn_s2': '#######'}
)
soup = BeautifulSoup(page.text, 'html.parser')
test = soup.find_all(class_ = 'team-scores')
print(len(test))
print(type(test))
print(test)
0
class 'bs4.element.ResultSet'
[]
虽然根据本文 https://stmorse.github.io/journal/espn-fantasy-python.html 和文章本身引用的一些帖子,cookie 似乎很重要,但在没有 cookie 的情况下执行请求会得到相同的结果。如果使用和不使用饼干,我比较了汤,结果是一样的。
我知道有 API 可以在 ESPN 上使用,但我无法设法让任何代码为我工作。我希望抓取球队名称,然后从每支球队和 运行 球队的每一个可能的时间表中获取结果,以获得结果分布,并查看每支球队在我的联盟中有多幸运或不幸。我也很好奇与雅虎一起做这件事。在这一点上,我可以轻松地手动获取数据,因为它不是太多,但我想要一个更通用的形式。
对于没有经验的网络抓取工具,任何建议或帮助将不胜感激。
你必须分享你的联赛 ID 才能进行测试,但这里有一些代码可以对联赛进行一些数据操作。基本上,您将获得以 json 格式返回的数据,然后需要对其进行解析以根据每周积分计算出 wins/losses。然后你可以排序创建一个决赛 table 来比较常规赛记录和总记录,看看哪些球队根据时间表表现 above/below:
import requests
import pandas as pd
s = requests.Session()
r = s.get('https://www.espn.com')
swid = s.cookies.get_dict()['SWID']
league_id = 31181
url = 'https://fantasy.espn.com/apis/v3/games/ffl/seasons/2019/segments/0/leagues/%s' %league_id
r = requests.get(url, cookies={"swid": swid}).json()
#Get Team IDs
teamId = {}
for team in r['teams']:
teamId[team['id']] = team['location'].strip() + ' ' + team['nickname'].strip()
#Get each team's weekly points and calculate their head-to-head records
weeklyPoints = {}
r = requests.get(url, cookies={"swid": swid}, params={"view": "mMatchup"}).json()
weeklyPts = pd.DataFrame()
for each in r['schedule']:
#each = r['schedule'][0]
week = each['matchupPeriodId']
if week >= 14:
continue
homeTm = teamId[each['home']['teamId']]
homeTmPts = each['home']['totalPoints']
try:
awayTm = teamId[each['away']['teamId']]
awayTmPts = each['away']['totalPoints']
except:
homeTmPts = 'BYE'
continue
temp_df = pd.DataFrame(list(zip([homeTm, awayTm], [homeTmPts, awayTmPts], [week, week])), columns=['team','pts','week'])
if homeTmPts > awayTmPts:
temp_df.loc[0,'win'] = 1
temp_df.loc[0,'loss'] = 0
temp_df.loc[0,'tie'] = 0
temp_df.loc[1,'win'] = 0
temp_df.loc[1,'loss'] = 1
temp_df.loc[1,'tie'] = 0
elif homeTmPts < awayTmPts:
temp_df.loc[0,'win'] = 0
temp_df.loc[0,'loss'] = 1
temp_df.loc[0,'tie'] = 0
temp_df.loc[1,'win'] = 1
temp_df.loc[1,'loss'] = 0
temp_df.loc[1,'tie'] = 0
elif homeTmPts == awayTmPts:
temp_df.loc[0,'win'] = 0
temp_df.loc[0,'loss'] = 0
temp_df.loc[0,'tie'] = 1
temp_df.loc[1,'win'] = 0
temp_df.loc[1,'loss'] = 0
temp_df.loc[1,'tie'] = 1
weeklyPts = weeklyPts.append(temp_df, sort=True).reset_index(drop=True)
weeklyPts['win'] = weeklyPts.groupby(['team'])['win'].cumsum()
weeklyPts['loss'] = weeklyPts.groupby(['team'])['loss'].cumsum()
weeklyPts['tie'] = weeklyPts.groupby(['team'])['tie'].cumsum()
# Calculate each teams record compared to all other teams points week to week
cumWeeklyRecord = {}
for week in weeklyPts[weeklyPts['pts'] > 0]['week'].unique():
df = weeklyPts[weeklyPts['week'] == week]
cumWeeklyRecord[week] = {}
for idx, row in df.iterrows():
team = row['team']
pts = row['pts']
win = len(df[df['pts'] < pts])
loss = len(df[df['pts'] > pts])
tie = len(df[df['pts'] == pts])
cumWeeklyRecord[week][team] = {}
cumWeeklyRecord[week][team]['win'] = win
cumWeeklyRecord[week][team]['loss'] = loss
cumWeeklyRecord[week][team]['tie'] = tie-1
# Combine those cumluative records to get an overall season record
overallRecord = {}
for each in cumWeeklyRecord.items():
for team in each[1].keys():
if team not in overallRecord.keys():
overallRecord[team] = {}
win = each[1][team]['win']
loss = each[1][team]['loss']
tie = each[1][team]['tie']
if 'win' not in overallRecord[team].keys():
overallRecord[team]['win'] = win
else:
overallRecord[team]['win'] += win
if 'loss' not in overallRecord[team].keys():
overallRecord[team]['loss'] = loss
else:
overallRecord[team]['loss'] += loss
if 'tie' not in overallRecord[team].keys():
overallRecord[team]['tie'] = tie
else:
overallRecord[team]['tie'] += tie
# Little cleaning up of the data nd calculating win %
overallRecord_df = pd.DataFrame(overallRecord).T
overallRecord_df = overallRecord_df.rename_axis('team').reset_index()
overallRecord_df = overallRecord_df.rename(columns={'win':'overall_win', 'loss':'overall_loss','tie':'overall_tie'})
overallRecord_df['overall_win%'] = overallRecord_df['overall_win'] / (overallRecord_df['overall_win'] + overallRecord_df['overall_loss'] + overallRecord_df['overall_tie'])
overallRecord_df['overall_rank'] = overallRecord_df['overall_win%'].rank(ascending=False, method='min')
regularSeasRecord = weeklyPts[weeklyPts['week'] == 13][['team','win','loss', 'tie']]
regularSeasRecord['win%'] = regularSeasRecord['win'] / (regularSeasRecord['win'] + regularSeasRecord['loss'] + regularSeasRecord['tie'])
regularSeasRecord['rank'] = regularSeasRecord['win%'].rank(ascending=False, method='min')
final_df = overallRecord_df.merge(regularSeasRecord, how='left', on=['team'])
输出:
print (final_df.sort_values('rank').to_string())
team overall_loss overall_tie overall_win overall_win% overall_rank win loss tie win% rank
0 Luck Dynasty 39 0 104 0.727273 1.0 12.0 1.0 0.0 0.923077 1.0
10 Warsaw Widow Makers 48 0 95 0.664336 3.0 10.0 3.0 0.0 0.769231 2.0
2 Team Powell 60 0 83 0.580420 5.0 8.0 5.0 0.0 0.615385 3.0
1 Team White 46 0 97 0.678322 2.0 7.0 6.0 0.0 0.538462 4.0
3 The SouthWest Slingers 55 0 88 0.615385 4.0 7.0 6.0 0.0 0.538462 4.0
5 U MAD BRO? 71 0 72 0.503497 6.0 7.0 6.0 0.0 0.538462 4.0
11 Team Troxell 88 0 55 0.384615 9.0 7.0 6.0 0.0 0.538462 4.0
6 Organized Chaos 72 0 71 0.496503 7.0 6.0 7.0 0.0 0.461538 8.0
7 Jobobes Jabronis 88 0 55 0.384615 9.0 6.0 7.0 0.0 0.461538 8.0
4 Killa Bees!! 98 0 45 0.314685 11.0 4.0 9.0 0.0 0.307692 10.0
9 Faceless Men 86 0 57 0.398601 8.0 3.0 10.0 0.0 0.230769 11.0
8 Rollin with Mahomies 107 0 36 0.251748 12.0 1.0 12.0 0.0 0.076923 12.0