可以使用 scrapy 重新创建此 selenium 代码吗?
Could this selenium code be recreated using scrapy?
我有兴趣更好地了解 scrapy 的功能。这是一个非常简单的 selenium 代码,它与网站交互,填写一些框,点击一些元素并下载文件。可以使用 scrapy 复制此代码吗?,以便使用 scrapy 编写的代码完全相同。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options=Options()
options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
"selenium code be recreated using scrapy"
也可以与 SeleniuRequest
一起正常工作,这比一般硒 superfast
好。你需要 scrapy project.It 以无头模式工作,但始终为每个步骤获取屏幕截图。
脚本:
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
class TestSpider(scrapy.Spider):
name = 'test'
def start_requests(self):
yield SeleniumRequest(
url='https://www.ons.gov.uk',
callback=self.parse,
wait_time = 3,
screenshot = True
)
def parse(self, response):
driver = response.meta['driver']
driver.save_screenshot('screenshot.png')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
driver.save_screenshot('screenshot_1.png')
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
driver.save_screenshot('screenshot_2.png')
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
Screenshot
settings.py 文件:
您必须在 settings.py 文件中添加以下选项
# Middleware
DOWNLOADER_MIDDLEWARES = {
'scrapy_selenium.SeleniumMiddleware': 800
}
# Selenium
from shutil import which
SELENIUM_DRIVER_NAME = 'chrome'
SELENIUM_DRIVER_EXECUTABLE_PATH = which('chromedriver')
SELENIUM_DRIVER_ARGUMENTS = ['--headless']
输出:
'downloader/response_status_count/200'
screenshot of the project looks like
screenshot
我有兴趣更好地了解 scrapy 的功能。这是一个非常简单的 selenium 代码,它与网站交互,填写一些框,点击一些元素并下载文件。可以使用 scrapy 复制此代码吗?,以便使用 scrapy 编写的代码完全相同。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options=Options()
options.add_argument("--window-size=1920,1080")
driver=webdriver.Chrome(options=options)
driver.get("https://www.ons.gov.uk/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
"selenium code be recreated using scrapy"
也可以与 SeleniuRequest
一起正常工作,这比一般硒 superfast
好。你需要 scrapy project.It 以无头模式工作,但始终为每个步骤获取屏幕截图。
脚本:
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
class TestSpider(scrapy.Spider):
name = 'test'
def start_requests(self):
yield SeleniumRequest(
url='https://www.ons.gov.uk',
callback=self.parse,
wait_time = 3,
screenshot = True
)
def parse(self, response):
driver = response.meta['driver']
driver.save_screenshot('screenshot.png')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys("Education and childcare")
driver.save_screenshot('screenshot_1.png')
click_button=driver.find_element_by_xpath('//*[@id="nav-search-submit"]').click()
driver.save_screenshot('screenshot_2.png')
click_button=driver.find_element_by_xpath('//*[@id="results"]/div[1]/div[2]/div[1]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[1]/section/div/div[1]/div/div[2]/h3/a/span').click()
click_button=driver.find_element_by_xpath('//*[@id="main"]/div[2]/div/div[1]/div[2]/p[2]/a').click()
Screenshot
settings.py 文件:
您必须在 settings.py 文件中添加以下选项
# Middleware
DOWNLOADER_MIDDLEWARES = {
'scrapy_selenium.SeleniumMiddleware': 800
}
# Selenium
from shutil import which
SELENIUM_DRIVER_NAME = 'chrome'
SELENIUM_DRIVER_EXECUTABLE_PATH = which('chromedriver')
SELENIUM_DRIVER_ARGUMENTS = ['--headless']
输出:
'downloader/response_status_count/200'
screenshot of the project looks like
screenshot