使用 Python Selenium 下载 csv 文件

Download csv file with Python Selenium

我正在尝试通过 python 和 selenium 以编程方式下载 csv 文件,因为我需要这样做数百次。完成这项工作的手动步骤是:

  1. 转到https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced
  2. Select County Name 在下拉框中输入“IOWA”,然后点击 Add
  3. Select Doc Number 在下拉框中输入“358407”,然后点击Add
  4. 点击提交
  5. Select 结果的第一行 table(在同一选项卡中打开新页面)
  6. 在右侧突出显示 CSV 报告
  7. 单击“前往”保存文件。

我在第 5 步中以编程方式完成了所有工作,我相信第 6 步(下面的框 2...)也能正常工作。但是,当我 运行 代码中的 submit2 行时,似乎没有任何内容可以下载。我假设这对于那些比我更了解硒的人来说可能很容易 catch/fix。我也试过

source = driver.find_element(By.ID, 'DTLNavigator_Report2_ReportsListBox')
action = webdriver.ActionChains(driver)
action.double_click(source)

但好像也没用。所以我要么搞砸了代码,要么就是找不到下载的文件。您可以提供的任何帮助将不胜感激。我希望我已经提供了足够的信息供您阅读。

下面是我目前的代码:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
options = Options()
options.add_experimental_option("prefs", {"download.default_directory": r"D://Users//User//Downloads","download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True})
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()                                  
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()

box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')

submit2 = driver.find_element(By.ID, "ReportListButton").click()

在无头模式下下载文件的解决方法是使用 driver.command_executor 方法指定下载路径。

我能够在无头模式下使用以下代码在当前目录中下载 csv-

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
import os
import time

options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")

DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)

driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
#set download path (set to current working directory in this example)
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow','downloadPath':os.getcwd()}}
command_result = driver.execute("send_command", params)

driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()                                  
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()

box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')

submit2 = driver.find_element(By.ID, "ReportListButton").click()

# wait for csv download to complete
time.sleep(5)