Python find_element find_element 中的问题?

Python find_element in find_elements problem?

大家好:

我想从网上获取篮球比赛数据,包括联赛、日期、时间和比分……

循环的第一级可以很好地获得每个联赛冠军

for league in leagues:

但是第二层for循环

for row in _rows:

我总是得到所有联赛行,我只需要联赛的数据

我应该怎么做才能解决它?

任何帮助将不胜感激。

from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import time

from selenium.common.exceptions import NoSuchElementException        

driver = webdriver.Chrome()
driver.set_window_size(1500,1350)

# open url (sorry for the url , cause system always report its a spam)
driver.get("https://"+"we"+"b2."+"sa8"+"8"+"88.n"+"et"+"/sp"+"ort/Ga"+"mes.aspxdevice=pc")

# jump to basketball
locator = (By.XPATH, '//*[@id="menuList"]/div/ul/li[3]/div[2]/a[1]') 
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
time.sleep(1)

# date menu
locator = (By.XPATH, '//*[@id="chooseDate"]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()

# jump to date 1
locator = (By.XPATH, '//*[@id="dateOption"]/a[1]/span[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()

# close AD by double clicl
locator = (By.ID, 'btn_close') 
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
actions = ActionChains(driver)
actions.click(pointer).perform()

# list all leagues schedule
leagues = []
leagues = driver.find_elements(By.XPATH, '//*[@id="scheduleBottom"]/table[*]')
for league in leagues:
    #print("Block.text=",Block.text,"\n")
    #_rows = Block.find_elements(By.TAG_NAME, "tr")
    league_Title = league.find_element(By.TAG_NAME ,'caption')
    _rows = []
    _rows = league.find_elements(By.XPATH, "//*[contains(@id, '_mainRow') or contains(@id, '_secondRow')]")
    print("\nleague : ",league_Title.text, 'len(_rows)=',len(_rows))
    for row in _rows:
        print(league_Title,row.text) #," / _rows=",_rows)
        # first_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_mainRow')]")
        # second_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_secondRow')]")
        print("\trow : ",row.text)
        time.sleep(1)

time.sleep(120)
driver.quit()

我认为 find_element() 或 find() 仅适用于页面上的一个元素。如果您对页面上的多个元素使用 find_element() ,您将只获得元素列表的第一个元素。 find_elements 或 findAll() 适用于页面上的所有元素。此函数将 return 数组格式的数据。 希望对你有所帮助。

我无法 运行 编码,因为页面显示 Error 404

编辑: 问题是您在 Games.aspx?device=pc 中忘记了 ?,这给 404

带来了问题

您必须在 xpath 的开头使用点 . 才能使用相对于 league

的路径
_rows = league.find_elements(By.XPATH, ".//...rest...")  # <-- dot before `//`

您使用绝对 xpath 并且它会完整搜索 HTML。


编辑:

xpath 中带点的部分结果:

我用lang=3获取英文文本,

我用 a[2] 到 select 第二个日期 (03 / 06 (Sun)) 因为第一个日期 (03 / 05 (Sat)) 是空的(没有匹配)

url: https://web2.sa8888.net/sport/Games.aspx?lang=3&device=pc

len(leagues): 115

league: NBA len(_rows)= 12
    row: 06:05 Finished Dallas Mavericks 26 25 34 29 114 | Live Update
    row: Sacramento Kings 36 29 27 21 113
    row: 08:05 Finished Charlotte Hornets 31 31 37 24 123 | Live Update
    row: San Antonio Spurs 30 30 37 20 117
    row: 09:05 Finished Miami Heat 22 32 19 26 99 | Live Update
    row: Philadelphia 76ers 14 26 28 14 82
    row: 09:05 Finished Memphis Grizzlies 31 37 29 27 124 | Live Update
    row: Orlando Magic 29 16 29 22 96
    row: 09:05 Finished Minnesota Timberwolves 32 31 46 26 135 | Live Update
    row: Portland Trail Blazers 34 30 37 20 121
    row: 09:35 Finished Los Angeles Lakers 32 30 27 35 124 | Live Update
    row: Golden State Warriors 25 42 27 22 116
---

league: NBA GATORADE LEAGUE len(_rows)= 8
    row: 08:00 Finished Delaware Blue Coats 42 34 37 33 146 | Live Update
    row: Westchester Knicks 28 28 24 31 111
    row: 09:00 Finished Austin Spurs 35 21 23 31 110 | Live Update
    row: Salt Lake City Stars 30 32 21 17 100
    row: 09:00 Finished Wisconsin Herd 26 30 20 38 114 | Live Update
    row: Capital City Go-Go 27 31 32 38 128
    row: 11:00 Finished Santa Cruz Warriors 36 19 17 27 99 | Live Update
    row: Memphis Hustle 26 29 22 30 107
---

league: CHINA PROFESSIONAL BASKETBALL LEAGUE len(_rows)= 12
    row: 11:00 Finished Fujian Sturgeons 37 21 27 32 117 | Live Update
    row: Ningbo Rockets 24 28 34 25 111
    row: 11:00 Finished Sichuan Blue Whales 12 21 27 20 80 | Live Update
    row: Zhejiang Lions 23 27 35 25 110
    row: 15:00 Finished Shenzhen Leopards 23 32 30 33 118 | Live Update
    row: Shandong Hi Speed 29 25 32 29 115
    row: 15:30 Finished Jilin Northeast Tigers 36 39 25 18 118 | Live Update
    row: Shanghai Sharks 15 25 32 36 108
    row: 19:35 Finished Beijing Ducks 24 20 17 22 83 | Live Update
    row: Beijing Royal Fighters 18 18 21 22 79
    row: 20:00 Finished Nanjing Monkey King 23 24 23 25 95 | Live Update
    row: Jiangsu Dragons 18 17 21 24 80
---

完整的工作代码:

我也加了WebDriverWait等待联赛
row.text.replace('\n', ' | ')一行显示一行

from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import time

from selenium.common.exceptions import NoSuchElementException        

driver = webdriver.Chrome()
driver.set_window_size(1500, 1350)

# open url (sorry for the url , cause system always report its a spam)
driver.get("https://web2.sa8888.net/sport/Games.aspx?lang=3&device=pc")  # lang=3 for English

# jump to basketball
locator = (By.XPATH, '//*[@id="menuList"]/div/ul/li[3]/div[2]/a[1]') 
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
time.sleep(1)

# date menu
locator = (By.XPATH, '//*[@id="chooseDate"]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()

# jump to date 1
locator = (By.XPATH, '//*[@id="dateOption"]/a[2]/span[1]')  # a[2] for second date, because first has no matches
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()

# close AD by double clicl
locator = (By.ID, 'btn_close') 
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
actions = ActionChains(driver)
actions.click(pointer).perform()

# wait for leagues
locator = (By.XPATH, '//*[@id="scheduleBottom"]/table[*]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)

# list all leagues schedule
leagues = driver.find_elements(By.XPATH, '//*[@id="scheduleBottom"]/table[*]')
print('len(leagues):', len(leagues))

for league in leagues:
    #print("Block.text=",Block.text,"\n")
    #_rows = Block.find_elements(By.TAG_NAME, "tr")
    league_Title = league.find_element(By.TAG_NAME ,'caption')

    _rows = league.find_elements(By.XPATH, ".//*[contains(@id, '_mainRow') or contains(@id, '_secondRow')]")
    print("\nleague:", league_Title.text, 'len(_rows)=', len(_rows))

    for row in _rows:
        #print(league_Title, row.text) #," / _rows=",_rows)
        # first_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_mainRow')]")
        # second_rows = Block.find_element(By.XPATH, "//*[contains(@id, '_secondRow')]")
        print("\trow:", row.text.replace('\n', ' | '))  # <- clean text
        time.sleep(1)
    print('---')

time.sleep(120)
driver.quit()