我怎样才能使这个 selenium 代码运行并行？

Question

我有两个单独的 selenium 代码，用于抓取网站和下载文件。我试图将它们合并到一个脚本中，并同时而不是按顺序使它们运行。有人可以创建一个合并两者的工作代码，以便它们运行并行吗？

这是第一个代码：

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options=Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")

driver=webdriver.Chrome(options=options)

params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)

driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()

这是第二个代码：

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options=Options()
#options.add_argument("--headless")
#options.add_argument("--window-size=1920,1080")

driver=webdriver.Chrome(options=options)

params={'behavior':'allow','downloadPath':os.getcwd()}
driver.execute_cdp_cmd('Page.setDownloadBehavior',params)

driver.get("https://data.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
click_button=driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()

Answer 1

最简单的方法是只创建一个大小为 2 的多线程池（您不需要多处理池，因为每个 Chrome 驱动程序已经运行在其自己的进程中）：

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

from multiprocessing.pool import ThreadPool
from functools import partial

def getDriver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--window-size=1920,1080")

    driver = webdriver.Chrome(options=options)
    return driver

def task1():
    driver = getDriver()
    try:
        params = {'behavior':'allow','downloadPath':os.getcwd()}
        driver.execute_cdp_cmd('Page.setDownloadBehavior',params)

        driver.get("https://www.ons.gov.uk/")
        WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
        WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
        click_button = driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
        click_button = driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
        click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
        click_button = driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
    finally:
        driver.quit()

def task2():
    driver = getDriver()
    try:
        params={'behavior':'allow','downloadPath':os.getcwd()}
        driver.execute_cdp_cmd('Page.setDownloadBehavior',params)

        driver.get("https://data.gov.uk/")
        WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/main/div[2]/form/div/div/input"))).send_keys("Forestry Statistics 2018: Recreation")
        click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div[2]/form/div/div/div/button').click()
        click_button = driver.find_element_by_xpath('/html/body/div[3]/form/main/div/div[2]/div[2]/div[2]/h2/a').click()
        click_button = driver.find_element_by_xpath('/html/body/div[3]/main/div/div/div/section/table/tbody/tr[2]/td[1]/a').click()
    finally:
        driver.quit()

def error_callback(task_name, e):
    print(f'{task_name} completed with exception {e}')

POOL_SIZE = 2 # We only need 2 for this case
pool = ThreadPool(POOL_SIZE)
pool.apply_async(task1, error_callback=partial(error_callback, 'task1'))
pool.apply_async(task2, error_callback=partial(error_callback, 'task2'))
# Wait for tasks to complete
pool.close()
pool.join()

我怎样才能使这个 selenium 代码运行并行？

How can I make this selenium code run in parallel?

python

multithreading

multiprocessing

python-3.x

我怎样才能使这个 selenium 代码 运行 并行？

How can I make this selenium code run in parallel?

python

multithreading

multiprocessing

python-3.x

我怎样才能使这个 selenium 代码运行并行？