如何使用 python 在 chrome 驱动程序的新选项卡中打开每个 url
How to open each url in new tab in chrome driver using python
我有 URL 列表,我需要从中抓取数据。在新驱动程序中打开每个 url 时网站拒绝连接,所以我决定在新选项卡中打开每个 url(网站允许这种方式)。下面的代码我正在使用
from selenium import webdriver
import time
from lxml import html
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
for aa in file:
aa = aa.strip()
driver.execute_script("window.open('{}');".format(aa))
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
但问题是所有 URL 都是一次打开,而不是一个接一个打开。
我认为你必须像这样在循环之前剥离:
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
aa = file.strip()
for i in aa:
driver.execute_script("window.open('{}');".format(i))
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
您可以试试这个代码:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from lxml import html
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
for aa in file:
#open tab
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
# You can use (Keys.CONTROL + 't') on other OSs
# Load a page
driver.get(aa)
# Make the tests...
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
driver.close()
我有 URL 列表,我需要从中抓取数据。在新驱动程序中打开每个 url 时网站拒绝连接,所以我决定在新选项卡中打开每个 url(网站允许这种方式)。下面的代码我正在使用
from selenium import webdriver
import time
from lxml import html
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
for aa in file:
aa = aa.strip()
driver.execute_script("window.open('{}');".format(aa))
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
但问题是所有 URL 都是一次打开,而不是一个接一个打开。
我认为你必须像这样在循环之前剥离:
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
aa = file.strip()
for i in aa:
driver.execute_script("window.open('{}');".format(i))
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
您可以试试这个代码:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from lxml import html
driver = webdriver.Chrome()
driver.get('https://www.google.com/')
file = open('f:\listofurls.txt', 'r')
for aa in file:
#open tab
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
# You can use (Keys.CONTROL + 't') on other OSs
# Load a page
driver.get(aa)
# Make the tests...
soup = html.fromstring(driver.page_source)
name = soup.xpath('//div[@class="name"]//text()')
title = soup.xpath('//div[@class="title"]//text()')
print(name, title)
time.sleep(3)
driver.close()