使用 Python Selenium 下载 csv 文件
Download csv file with Python Selenium
我正在尝试通过 python 和 selenium 以编程方式下载 csv 文件,因为我需要这样做数百次。完成这项工作的手动步骤是:
- 转到https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced
- Select County Name 在下拉框中输入“IOWA”,然后点击 Add
- Select Doc Number 在下拉框中输入“358407”,然后点击Add
- 点击提交
- Select 结果的第一行 table(在同一选项卡中打开新页面)
- 在右侧突出显示 CSV 报告
- 单击“前往”保存文件。
我在第 5 步中以编程方式完成了所有工作,我相信第 6 步(下面的框 2...)也能正常工作。但是,当我 运行 代码中的 submit2 行时,似乎没有任何内容可以下载。我假设这对于那些比我更了解硒的人来说可能很容易 catch/fix。我也试过
source = driver.find_element(By.ID, 'DTLNavigator_Report2_ReportsListBox')
action = webdriver.ActionChains(driver)
action.double_click(source)
但好像也没用。所以我要么搞砸了代码,要么就是找不到下载的文件。您可以提供的任何帮助将不胜感激。我希望我已经提供了足够的信息供您阅读。
下面是我目前的代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
options = Options()
options.add_experimental_option("prefs", {"download.default_directory": r"D://Users//User//Downloads","download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True})
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
在无头模式下下载文件的解决方法是使用 driver.command_executor
方法指定下载路径。
我能够在无头模式下使用以下代码在当前目录中下载 csv-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
import os
import time
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
#set download path (set to current working directory in this example)
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow','downloadPath':os.getcwd()}}
command_result = driver.execute("send_command", params)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
# wait for csv download to complete
time.sleep(5)
我正在尝试通过 python 和 selenium 以编程方式下载 csv 文件,因为我需要这样做数百次。完成这项工作的手动步骤是:
- 转到https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced
- Select County Name 在下拉框中输入“IOWA”,然后点击 Add
- Select Doc Number 在下拉框中输入“358407”,然后点击Add
- 点击提交
- Select 结果的第一行 table(在同一选项卡中打开新页面)
- 在右侧突出显示 CSV 报告
- 单击“前往”保存文件。
我在第 5 步中以编程方式完成了所有工作,我相信第 6 步(下面的框 2...)也能正常工作。但是,当我 运行 代码中的 submit2 行时,似乎没有任何内容可以下载。我假设这对于那些比我更了解硒的人来说可能很容易 catch/fix。我也试过
source = driver.find_element(By.ID, 'DTLNavigator_Report2_ReportsListBox')
action = webdriver.ActionChains(driver)
action.double_click(source)
但好像也没用。所以我要么搞砸了代码,要么就是找不到下载的文件。您可以提供的任何帮助将不胜感激。我希望我已经提供了足够的信息供您阅读。
下面是我目前的代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
options = Options()
options.add_experimental_option("prefs", {"download.default_directory": r"D://Users//User//Downloads","download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True})
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
在无头模式下下载文件的解决方法是使用 driver.command_executor
方法指定下载路径。
我能够在无头模式下使用以下代码在当前目录中下载 csv-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
import os
import time
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
#set download path (set to current working directory in this example)
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow','downloadPath':os.getcwd()}}
command_result = driver.execute("send_command", params)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
# wait for csv download to complete
time.sleep(5)