我怎样才能使这个 selenium 代码 运行 并行?
How can I make this selenium code run in parallel?
我有两个单独的 selenium 代码,用于抓取网站和下载文件。我试图将它们合并到一个脚本中,并同时而不是按顺序使它们 运行。有人可以创建一个合并两者的工作代码,以便它们 运行 并行吗?
这是第一个代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options=Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
这是第二个代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options=Options()
#options.add_argument("--headless")
#options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://data.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()
最简单的方法是只创建一个大小为 2 的多线程池(您不需要多处理池,因为每个 Chrome 驱动程序已经 运行 在其自己的进程中):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from multiprocessing.pool import ThreadPool
from functools import partial
def getDriver():
options = Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
return driver
def task1():
driver = getDriver()
try:
params = {'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button = driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button = driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
finally:
driver.quit()
def task2():
driver = getDriver()
try:
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://data.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
click_button = driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()
finally:
driver.quit()
def error_callback(task_name, e):
print(f'{task_name} completed with exception {e}')
POOL_SIZE = 2 # We only need 2 for this case
pool = ThreadPool(POOL_SIZE)
pool.apply_async(task1, error_callback=partial(error_callback, 'task1'))
pool.apply_async(task2, error_callback=partial(error_callback, 'task2'))
# Wait for tasks to complete
pool.close()
pool.join()
我有两个单独的 selenium 代码,用于抓取网站和下载文件。我试图将它们合并到一个脚本中,并同时而不是按顺序使它们 运行。有人可以创建一个合并两者的工作代码,以便它们 运行 并行吗?
这是第一个代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options=Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
这是第二个代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options=Options()
#options.add_argument("--headless")
#options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://data.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()
最简单的方法是只创建一个大小为 2 的多线程池(您不需要多处理池,因为每个 Chrome 驱动程序已经 运行 在其自己的进程中):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from multiprocessing.pool import ThreadPool
from functools import partial
def getDriver():
options = Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)
return driver
def task1():
driver = getDriver()
try:
params = {'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button = driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button = driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
finally:
driver.quit()
def task2():
driver = getDriver()
try:
params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)
driver.get("https://data.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
click_button = driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()
finally:
driver.quit()
def error_callback(task_name, e):
print(f'{task_name} completed with exception {e}')
POOL_SIZE = 2 # We only need 2 for this case
pool = ThreadPool(POOL_SIZE)
pool.apply_async(task1, error_callback=partial(error_callback, 'task1'))
pool.apply_async(task2, error_callback=partial(error_callback, 'task2'))
# Wait for tasks to complete
pool.close()
pool.join()