t.xpath 没有从第二页获得结果?
t.xpath not getting results from second page?
我正在尝试遍历网站的多个页面,但是我在下面使用的代码仅返回第一页的结果,即使我使用 Selenium 单击到下一页也是如此。我不知道是什么原因造成的。任何解释将不胜感激!
from selenium import webdriver
import time
import xlsxwriter
from lxml import html
u = 'https://www.cruiseplum.com/search#{%22numPax%22:2,%22geo%22:%22US%22,%22portsMatchAll%22:true,%22numOptionsShown%22:100,%22ppdIncludesTaxTips%22:true,%22uiVersion%22:%22split%22,%22sortTableByField%22:%22dd%22,%22sortTableOrderDesc%22:false,%22filter%22:null}'
driver = webdriver.Chrome()
driver.get(u)
driver.maximize_window()
time.sleep(.3)
driver.find_element_by_id('restoreSettingsYesEncl').click() # select 'yes' on the webpage to restore settings
time.sleep(7) # wait until the website downloads data so we get a return value
elem = driver.find_element_by_xpath("//*")
source_code = elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in range(5):
for i in t.xpath('.//td[@class="dc-table-column _0"]/text()'):
print(i.strip())
driver.find_element_by_xpath('//*[@id="listings-table-split"]/div[5]/div/span[4]').click() # click to next page
time.sleep(.05)
driver.quit()
在上面的代码中,t 在循环外获取值
elem = driver.find_element_by_xpath("//*") source_code =
elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in range(5):
所以它只在第一次加载并且不断重复相同的元素。为此,您需要将其移动到循环内,如下面的代码所示:
from selenium import webdriver
import time
import xlsxwriter
from lxml import html
u = 'https://www.cruiseplum.com/search#{%22numPax%22:2,%22geo%22:%22US%22,%22portsMatchAll%22:true,%22numOptionsShown%22:100,%22ppdIncludesTaxTips%22:true,%22uiVersion%22:%22split%22,%22sortTableByField%22:%22dd%22,%22sortTableOrderDesc%22:false,%22filter%22:null}'
driver = webdriver.Chrome()
driver.get(u)
driver.maximize_window()
time.sleep(.3)
driver.find_element_by_id('restoreSettingsYesEncl').click() # select 'yes' on the webpage to restore settings
time.sleep(7) # wait until the website downloads data so we get a return value
for i in range(5):
elem = driver.find_element_by_xpath("//*")
source_code = elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in t.xpath('.//td[@class="dc-table-column _0"]/text()'):
print(i.strip())
driver.find_element_by_xpath('//*[@id="listings-table-split"]/div[5]/div/span[4]').click() # click to next page
time.sleep(.05)
driver.quit()
我正在尝试遍历网站的多个页面,但是我在下面使用的代码仅返回第一页的结果,即使我使用 Selenium 单击到下一页也是如此。我不知道是什么原因造成的。任何解释将不胜感激!
from selenium import webdriver
import time
import xlsxwriter
from lxml import html
u = 'https://www.cruiseplum.com/search#{%22numPax%22:2,%22geo%22:%22US%22,%22portsMatchAll%22:true,%22numOptionsShown%22:100,%22ppdIncludesTaxTips%22:true,%22uiVersion%22:%22split%22,%22sortTableByField%22:%22dd%22,%22sortTableOrderDesc%22:false,%22filter%22:null}'
driver = webdriver.Chrome()
driver.get(u)
driver.maximize_window()
time.sleep(.3)
driver.find_element_by_id('restoreSettingsYesEncl').click() # select 'yes' on the webpage to restore settings
time.sleep(7) # wait until the website downloads data so we get a return value
elem = driver.find_element_by_xpath("//*")
source_code = elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in range(5):
for i in t.xpath('.//td[@class="dc-table-column _0"]/text()'):
print(i.strip())
driver.find_element_by_xpath('//*[@id="listings-table-split"]/div[5]/div/span[4]').click() # click to next page
time.sleep(.05)
driver.quit()
在上面的代码中,t 在循环外获取值
elem = driver.find_element_by_xpath("//*") source_code = elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in range(5):
所以它只在第一次加载并且不断重复相同的元素。为此,您需要将其移动到循环内,如下面的代码所示:
from selenium import webdriver
import time
import xlsxwriter
from lxml import html
u = 'https://www.cruiseplum.com/search#{%22numPax%22:2,%22geo%22:%22US%22,%22portsMatchAll%22:true,%22numOptionsShown%22:100,%22ppdIncludesTaxTips%22:true,%22uiVersion%22:%22split%22,%22sortTableByField%22:%22dd%22,%22sortTableOrderDesc%22:false,%22filter%22:null}'
driver = webdriver.Chrome()
driver.get(u)
driver.maximize_window()
time.sleep(.3)
driver.find_element_by_id('restoreSettingsYesEncl').click() # select 'yes' on the webpage to restore settings
time.sleep(7) # wait until the website downloads data so we get a return value
for i in range(5):
elem = driver.find_element_by_xpath("//*")
source_code = elem.get_attribute("innerHTML")
t = html.fromstring(source_code)
for i in t.xpath('.//td[@class="dc-table-column _0"]/text()'):
print(i.strip())
driver.find_element_by_xpath('//*[@id="listings-table-split"]/div[5]/div/span[4]').click() # click to next page
time.sleep(.05)
driver.quit()