如何在 beautifulsoup 中找到列表的 parents
how to find the parents of a list in beautifulsoup
import requests
from bs4 import BeautifulSoup
url ="https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos"
headers= {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
response = requests.get(url, headers = headers)
soup = BeautifulSoup(response.content, "html.parser")
#stat_tables = soup.find_all("table", class_="stats-table")
results = {}
all_results = soup.find_all("div", class_="round-history-half")
for partial_result in all_results:
half_results = partial_result.find_all("img")
for result in half_results:
if (result["title"]):
rounds_won = result["title"].split("-")
key = int(rounds_won[0]) + int(rounds_won[1])
results[key] = result["title"]
for key in sorted(results):
print(key, results[key])
这段代码给出了比赛的所有分数,我想找到 parent,让我们说打印(结果[4]),看看谁赢了。我不确定如何从列表中找到 parents。
结果应该是
<img alt="FURIA" src="https://static.hltv.org/images/team/logo/8297" class="round-history-team" title="FURIA">
或
Furia
使用这个,
# retrieve the parent first.
divs = soup.find_all("div", class_="round-history-team-row")
for div in divs:
parent_img_title = div.find('img', class_="round-history-team")['title']
print("Title : " + parent_img_title)
for result in div.find_all("img", class_="round-history-outcome"):
if result["title"]:
rounds_won = result["title"].split("-")
key = int(rounds_won[0]) + int(rounds_won[1])
print("Key %d" % key)
results[key] = result["title"]
输出,
Title : FURIA
Key 4
Key 5
...
Title : Chaos
Key 1
Key 2
您可能想要更改数据结构以使其更易于使用:
编辑:更新了新的数据结构
import json
import requests
from bs4 import BeautifulSoup
url = 'https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
#stat_tables = soup.find_all('table', class_='stats-table')
results = {}
# get the match root element
match = soup.find('div', class_='standard-box round-history-con')
# get all the teams in the match
teams = [team['title'] for team in match.find_all('img', class_='round-history-team')]
# get the rows in the match visualization
rows = match.find_all('div', class_='round-history-team-row')
for row in rows:
# get the index and round element for each round
for idx, rnd in enumerate(row.find_all('img', class_='round-history-outcome')):
# make sure there is data in the round
if (rnd['title']):
# split the string
scores = rnd['title'].split('-')
# get the team and score, this won't work if they get out of order
results[idx+1] = {team: score for team, score in zip(teams, scores)}
sorted_results = {i: results[i]
for i in sorted(results.keys())}
print(json.dumps(sorted_results, indent=4))
输出:
{
"1": {
"FURIA": "0",
"Chaos": "1"
},
"2": {
"FURIA": "0",
"Chaos": "2"
},
"3": {
"FURIA": "0",
"Chaos": "3"
},
"4": {
"FURIA": "1",
"Chaos": "3"
},
"5": {
"FURIA": "2",
"Chaos": "3"
},
"6": {
"FURIA": "3",
"Chaos": "3"
},
"7": {
"FURIA": "4",
"Chaos": "3"
},
"8": {
"FURIA": "5",
"Chaos": "3"
},
"9": {
"FURIA": "5",
"Chaos": "4"
},
"10": {
"FURIA": "5",
"Chaos": "5"
},
"11": {
"FURIA": "5",
"Chaos": "6"
},
"12": {
"FURIA": "5",
"Chaos": "7"
},
"13": {
"FURIA": "6",
"Chaos": "7"
},
"14": {
"FURIA": "7",
"Chaos": "7"
},
"15": {
"FURIA": "8",
"Chaos": "7"
},
"16": {
"FURIA": "9",
"Chaos": "7"
},
"17": {
"FURIA": "10",
"Chaos": "7"
},
"18": {
"FURIA": "10",
"Chaos": "8"
},
"19": {
"FURIA": "10",
"Chaos": "9"
},
"20": {
"FURIA": "10",
"Chaos": "10"
},
"21": {
"FURIA": "11",
"Chaos": "10"
},
"22": {
"FURIA": "12",
"Chaos": "10"
},
"23": {
"FURIA": "13",
"Chaos": "10"
"24": {
"FURIA": "14",
"Chaos": "10"
},
"25": {
"FURIA": "15",
"Chaos": "10"
},
"26": {
"FURIA": "15",
"Chaos": "11"
},
"27": {
"FURIA": "16",
"Chaos": "11"
}
}
试试这个...
if team_left[0].find_all("div", class_="bold won"):
print(team_left[0].find_all("img")[0].get("alt"))
else:
print(team_right[0].find_all("img")[0].get("alt"))
import requests
from bs4 import BeautifulSoup
url ="https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos"
headers= {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
response = requests.get(url, headers = headers)
soup = BeautifulSoup(response.content, "html.parser")
#stat_tables = soup.find_all("table", class_="stats-table")
results = {}
all_results = soup.find_all("div", class_="round-history-half")
for partial_result in all_results:
half_results = partial_result.find_all("img")
for result in half_results:
if (result["title"]):
rounds_won = result["title"].split("-")
key = int(rounds_won[0]) + int(rounds_won[1])
results[key] = result["title"]
for key in sorted(results):
print(key, results[key])
这段代码给出了比赛的所有分数,我想找到 parent,让我们说打印(结果[4]),看看谁赢了。我不确定如何从列表中找到 parents。
结果应该是
<img alt="FURIA" src="https://static.hltv.org/images/team/logo/8297" class="round-history-team" title="FURIA">
或
Furia
使用这个,
# retrieve the parent first.
divs = soup.find_all("div", class_="round-history-team-row")
for div in divs:
parent_img_title = div.find('img', class_="round-history-team")['title']
print("Title : " + parent_img_title)
for result in div.find_all("img", class_="round-history-outcome"):
if result["title"]:
rounds_won = result["title"].split("-")
key = int(rounds_won[0]) + int(rounds_won[1])
print("Key %d" % key)
results[key] = result["title"]
输出,
Title : FURIA
Key 4
Key 5
...
Title : Chaos
Key 1
Key 2
您可能想要更改数据结构以使其更易于使用: 编辑:更新了新的数据结构
import json
import requests
from bs4 import BeautifulSoup
url = 'https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
#stat_tables = soup.find_all('table', class_='stats-table')
results = {}
# get the match root element
match = soup.find('div', class_='standard-box round-history-con')
# get all the teams in the match
teams = [team['title'] for team in match.find_all('img', class_='round-history-team')]
# get the rows in the match visualization
rows = match.find_all('div', class_='round-history-team-row')
for row in rows:
# get the index and round element for each round
for idx, rnd in enumerate(row.find_all('img', class_='round-history-outcome')):
# make sure there is data in the round
if (rnd['title']):
# split the string
scores = rnd['title'].split('-')
# get the team and score, this won't work if they get out of order
results[idx+1] = {team: score for team, score in zip(teams, scores)}
sorted_results = {i: results[i]
for i in sorted(results.keys())}
print(json.dumps(sorted_results, indent=4))
输出:
{
"1": {
"FURIA": "0",
"Chaos": "1"
},
"2": {
"FURIA": "0",
"Chaos": "2"
},
"3": {
"FURIA": "0",
"Chaos": "3"
},
"4": {
"FURIA": "1",
"Chaos": "3"
},
"5": {
"FURIA": "2",
"Chaos": "3"
},
"6": {
"FURIA": "3",
"Chaos": "3"
},
"7": {
"FURIA": "4",
"Chaos": "3"
},
"8": {
"FURIA": "5",
"Chaos": "3"
},
"9": {
"FURIA": "5",
"Chaos": "4"
},
"10": {
"FURIA": "5",
"Chaos": "5"
},
"11": {
"FURIA": "5",
"Chaos": "6"
},
"12": {
"FURIA": "5",
"Chaos": "7"
},
"13": {
"FURIA": "6",
"Chaos": "7"
},
"14": {
"FURIA": "7",
"Chaos": "7"
},
"15": {
"FURIA": "8",
"Chaos": "7"
},
"16": {
"FURIA": "9",
"Chaos": "7"
},
"17": {
"FURIA": "10",
"Chaos": "7"
},
"18": {
"FURIA": "10",
"Chaos": "8"
},
"19": {
"FURIA": "10",
"Chaos": "9"
},
"20": {
"FURIA": "10",
"Chaos": "10"
},
"21": {
"FURIA": "11",
"Chaos": "10"
},
"22": {
"FURIA": "12",
"Chaos": "10"
},
"23": {
"FURIA": "13",
"Chaos": "10"
"24": {
"FURIA": "14",
"Chaos": "10"
},
"25": {
"FURIA": "15",
"Chaos": "10"
},
"26": {
"FURIA": "15",
"Chaos": "11"
},
"27": {
"FURIA": "16",
"Chaos": "11"
}
}
试试这个...
if team_left[0].find_all("div", class_="bold won"):
print(team_left[0].find_all("img")[0].get("alt"))
else:
print(team_right[0].find_all("img")[0].get("alt"))