使用 Python 在 Selenium WebDriver 中按 Escape 键不起作用
Pressing Escape does not function in Selenium WebDriver using Python
我正在制作页面抓取器,我想每 2 秒停止加载一次页面。所以我尝试使用操作链按下 ESCAPE 按钮,但它永远不起作用。没有错误发生,但什么也没有发生。下面是我的代码。 (为了发ESCAPE键我撕了一部分)先谢谢了!
- 我上传了我的完整代码!对不起,我的代码很乱(...)请大方一点!
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from openpyxl import load_workbook
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
import urllib.request
import time
import pandas as pd
wb = load_workbook(filename = 'C:/Users/yoosimyung/Desktop/Project/selenium/Taobao/Info_Taobao.xlsx')
sheet = wb['Furniture_Table']
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
chrome_driver = "C:/Users/yoosimyung/Desktop/Project/selenium/chromedriver.exe"
driver = webdriver.Chrome(chrome_driver, options=chrome_options)
Max = driver.find_element_by_css_selector("input.J_Input").get_attribute("max")
MaxN = int(Max)
print(Max)
p = 2
for i in range(1, MaxN+1):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
scheight = .01
while scheight < 9:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .013
th_images = driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")
count_th = 1
for image in th_images:
th_url = driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")[count_th-1].get_attribute("src")
urllib.request.urlretrieve(th_url, str(i) + "_" + str(count_th) + "_TH.jpg")
count_th = count_th + 1
count_detail = 1
for detail in th_images:
try:
driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .row.row-2.title .J_ClickStat")[count_detail-1].click()
except:
driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")[count_detail-1].click()
driver.switch_to.window(driver.window_handles[-1])
Currenturl = driver.current_url
url = str(Currenturl)
time.sleep(1)
webdriver.Actionchains(driver).send_keys(Keys.ESCAPE).perform()
if 'item.taobao.com' in url:
try:
driver.find_element_by_id("bd")
sheet['I' + str(p)].value = url
sheet['J' + str(p)].value = "=HYPERLINK(I" + str(p) + ")"
scheight = .02
while scheight < 8.5:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .022
time.sleep(0.005)
contentPage = driver.find_element_by_id("J_DivItemDesc")
detail_url = contentPage.find_elements_by_tag_name("img")
for k in range(len(detail_url)):
try:
urllib.request.urlretrieve(detail_url[k].get_attribute("src"), str(i) + "_" + str(count_detail) + "_detail_" + str(k) + ".jpg")
except:
pass
PDtext = driver.find_element_by_css_selector(".tb-main-title").text
PDprice = driver.find_element_by_css_selector(".tb-rmb-num").text
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = PDtext
print(PDtext)
if "-" in PDprice:
sheet['H' + str(p)].value = PDprice.split("-")[1]
print(PDprice.split("-")[1])
else:
sheet['H' + str(p)].value = PDprice
print(PDprice)
print(Currenturl)
except NoSuchElementException:
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = "PASS"
else:
try:
driver.find_element_by_id("content")
sheet['I' + str(p)].value = url
sheet['J' + str(p)].value = "=HYPERLINK(I" + str(p) + ")"
scheight = .02
while scheight < 8.5:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .022
time.sleep(0.005)
contentPage = driver.find_element_by_id("description")
detail_url = contentPage.find_elements_by_class_name("img-ks-lazyload:not(.desc_anchor)")
for k in range(len(detail_url)):
try:
urllib.request.urlretrieve(detail_url[k].get_attribute("src"), str(i) + "_" + str(count_detail) + "_detail_" + str(k) + ".jpg")
except:
pass
PDtext = driver.find_element_by_css_selector(".tb-detail-hd > h1").text
PDprice = driver.find_element_by_css_selector(".tm-price").text
sheet['A' + str(p)].value = str(i) + "_" + str(count_th)
sheet['B' + str(p)].value = PDtext
print (PDtext)
if "-" in PDprice:
sheet['H' + str(p)].value = PDprice.split("-")[1]
print(PDprice.split("-")[1])
else:
sheet['H' + str(p)].value = PDprice
print(PDprice)
print(Currenturl)
except NoSuchElementException:
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = "PASS"
driver.close()
driver.switch_to.window(driver.window_handles[0])
count_detail = count_detail + 1
p = p + 1
wb.save(filename = 'C:/Users/yoosimyung/Desktop/Project/selenium/Taobao/Info_Taobao.xlsx')
i = i + 1
driver.find_element_by_css_selector(".btn.J_Submit").click()
driver.find_element_by_xpath("//body").send_keys(Keys.ESCAPE)
如果您想停止页面加载和所有内容,请将键发送到 body 元素
我正在制作页面抓取器,我想每 2 秒停止加载一次页面。所以我尝试使用操作链按下 ESCAPE 按钮,但它永远不起作用。没有错误发生,但什么也没有发生。下面是我的代码。 (为了发ESCAPE键我撕了一部分)先谢谢了!
- 我上传了我的完整代码!对不起,我的代码很乱(...)请大方一点!
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from openpyxl import load_workbook
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
import urllib.request
import time
import pandas as pd
wb = load_workbook(filename = 'C:/Users/yoosimyung/Desktop/Project/selenium/Taobao/Info_Taobao.xlsx')
sheet = wb['Furniture_Table']
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
chrome_driver = "C:/Users/yoosimyung/Desktop/Project/selenium/chromedriver.exe"
driver = webdriver.Chrome(chrome_driver, options=chrome_options)
Max = driver.find_element_by_css_selector("input.J_Input").get_attribute("max")
MaxN = int(Max)
print(Max)
p = 2
for i in range(1, MaxN+1):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
scheight = .01
while scheight < 9:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .013
th_images = driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")
count_th = 1
for image in th_images:
th_url = driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")[count_th-1].get_attribute("src")
urllib.request.urlretrieve(th_url, str(i) + "_" + str(count_th) + "_TH.jpg")
count_th = count_th + 1
count_detail = 1
for detail in th_images:
try:
driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .row.row-2.title .J_ClickStat")[count_detail-1].click()
except:
driver.find_elements_by_css_selector(".item.J_MouserOnverReq:not(.item-ad) .J_ItemPic.img")[count_detail-1].click()
driver.switch_to.window(driver.window_handles[-1])
Currenturl = driver.current_url
url = str(Currenturl)
time.sleep(1)
webdriver.Actionchains(driver).send_keys(Keys.ESCAPE).perform()
if 'item.taobao.com' in url:
try:
driver.find_element_by_id("bd")
sheet['I' + str(p)].value = url
sheet['J' + str(p)].value = "=HYPERLINK(I" + str(p) + ")"
scheight = .02
while scheight < 8.5:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .022
time.sleep(0.005)
contentPage = driver.find_element_by_id("J_DivItemDesc")
detail_url = contentPage.find_elements_by_tag_name("img")
for k in range(len(detail_url)):
try:
urllib.request.urlretrieve(detail_url[k].get_attribute("src"), str(i) + "_" + str(count_detail) + "_detail_" + str(k) + ".jpg")
except:
pass
PDtext = driver.find_element_by_css_selector(".tb-main-title").text
PDprice = driver.find_element_by_css_selector(".tb-rmb-num").text
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = PDtext
print(PDtext)
if "-" in PDprice:
sheet['H' + str(p)].value = PDprice.split("-")[1]
print(PDprice.split("-")[1])
else:
sheet['H' + str(p)].value = PDprice
print(PDprice)
print(Currenturl)
except NoSuchElementException:
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = "PASS"
else:
try:
driver.find_element_by_id("content")
sheet['I' + str(p)].value = url
sheet['J' + str(p)].value = "=HYPERLINK(I" + str(p) + ")"
scheight = .02
while scheight < 8.5:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .022
time.sleep(0.005)
contentPage = driver.find_element_by_id("description")
detail_url = contentPage.find_elements_by_class_name("img-ks-lazyload:not(.desc_anchor)")
for k in range(len(detail_url)):
try:
urllib.request.urlretrieve(detail_url[k].get_attribute("src"), str(i) + "_" + str(count_detail) + "_detail_" + str(k) + ".jpg")
except:
pass
PDtext = driver.find_element_by_css_selector(".tb-detail-hd > h1").text
PDprice = driver.find_element_by_css_selector(".tm-price").text
sheet['A' + str(p)].value = str(i) + "_" + str(count_th)
sheet['B' + str(p)].value = PDtext
print (PDtext)
if "-" in PDprice:
sheet['H' + str(p)].value = PDprice.split("-")[1]
print(PDprice.split("-")[1])
else:
sheet['H' + str(p)].value = PDprice
print(PDprice)
print(Currenturl)
except NoSuchElementException:
sheet['A' + str(p)].value = str(i) + "_" + str(count_detail)
sheet['B' + str(p)].value = "PASS"
driver.close()
driver.switch_to.window(driver.window_handles[0])
count_detail = count_detail + 1
p = p + 1
wb.save(filename = 'C:/Users/yoosimyung/Desktop/Project/selenium/Taobao/Info_Taobao.xlsx')
i = i + 1
driver.find_element_by_css_selector(".btn.J_Submit").click()
driver.find_element_by_xpath("//body").send_keys(Keys.ESCAPE)
如果您想停止页面加载和所有内容,请将键发送到 body 元素