从 mcqs 抓取数据
Scrape data from mcqs
我想从 MCQ 中抓取数据,但他们会给我一个错误,我也想去 next page
以及我如何继续 next pages
抓取所有 MCQ 数据 有没有可行的解决方案请告诉我们
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'http://www.tulsithakur.com/bankingquiztwo.php'
driver.get(URL)
time.sleep(3)
title = driver.find_element_by_xpath("//span[@id='quest']//text()")
option_1 = driver.find_element_by_xpath("//span[@id='onee']//text()")
option_2 = driver.find_element_by_xpath("//span[@id='two']//text()")
option_3 = driver.find_element_by_xpath("//span[@id='three']//text()")
option_4 = driver.find_element_by_xpath("//span[@id='four']//text()")
print(title,option_1,option_2,option_3,option_4)
supplyvan_scraper()
此页面不包含 MCQ 问题和选项中的文本。如果你只点击下一步按钮,它就会获取数据,但它在每个字段(问题、答案)中都显示未定义。
你可以这样查 -
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
如果你想通过点击下一步按钮从所有页面抓取数据,你可以试试这个 -
try:
while True:
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
except Exception as e:
print(e)
如果您先单击 left-sidebar (Available Quiz Sets
),那么 undefined
问题就会消失。
所以,理想的步骤是 -
- 点击设置选项(左侧边栏)
- 抓取 qs 并单击下一步按钮
设置选项按钮 -
driver.find_element(By.XPATH, '//*[@id="features-wrapper"]/div[1]/div/div[1]/section/div/ul/form[1]/div/li/input')
每个选项的form
值都会改变。您的页面有 70 个选项,因此您可以遍历每个选项并抓取数据
我想从 MCQ 中抓取数据,但他们会给我一个错误,我也想去 next page
以及我如何继续 next pages
抓取所有 MCQ 数据 有没有可行的解决方案请告诉我们
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def supplyvan_scraper():
with chrome_driver as driver:
driver.implicitly_wait(15)
URL = 'http://www.tulsithakur.com/bankingquiztwo.php'
driver.get(URL)
time.sleep(3)
title = driver.find_element_by_xpath("//span[@id='quest']//text()")
option_1 = driver.find_element_by_xpath("//span[@id='onee']//text()")
option_2 = driver.find_element_by_xpath("//span[@id='two']//text()")
option_3 = driver.find_element_by_xpath("//span[@id='three']//text()")
option_4 = driver.find_element_by_xpath("//span[@id='four']//text()")
print(title,option_1,option_2,option_3,option_4)
supplyvan_scraper()
此页面不包含 MCQ 问题和选项中的文本。如果你只点击下一步按钮,它就会获取数据,但它在每个字段(问题、答案)中都显示未定义。
你可以这样查 -
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
如果你想通过点击下一步按钮从所有页面抓取数据,你可以试试这个 -
try:
while True:
driver.find_element(By.XPATH, '//*[@id="next"]').click()
title = driver.find_element(By.XPATH, "//span[@id='quest']").text
option_1 = driver.find_element(By.XPATH, "//span[@id='onee']").text
option_2 = driver.find_element(By.XPATH, "//span[@id='two']").text
option_3 = driver.find_element(By.XPATH, "//span[@id='three']").text
option_4 = driver.find_element(By.XPATH, "//span[@id='four']").text
print(title, option_1, option_2, option_3, option_4)
except Exception as e:
print(e)
如果您先单击 left-sidebar (Available Quiz Sets
),那么 undefined
问题就会消失。
所以,理想的步骤是 -
- 点击设置选项(左侧边栏)
- 抓取 qs 并单击下一步按钮
设置选项按钮 -
driver.find_element(By.XPATH, '//*[@id="features-wrapper"]/div[1]/div/div[1]/section/div/ul/form[1]/div/li/input')
每个选项的form
值都会改变。您的页面有 70 个选项,因此您可以遍历每个选项并抓取数据