Python 中的 Selenium PhantomJS 自定义 headers
Selenium PhantomJS custom headers in Python
我想将 "custom headers" 添加到 python 中的 Selenium PhantomJS。
这些是我要添加的headers。
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
}
这是我正在使用的代码:
from selenium import webdriver
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=socks5',
]
driver = webdriver.PhantomJS(service_args=service_args)
driver.set_window_size(1120, 550)
driver.get("https://duckduckgo.com/")
driver.find_element_by_id('search_form_input_homepage').send_keys("realpython")
driver.find_element_by_id("search_button_homepage").click()
print driver.current_url
driver.quit()
如何修改包含这些自定义的代码 headers?
请帮忙。
以下列方式设置 headers:
from selenium import webdriver
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
}
for key, value in enumerate(headers):
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
然后开始使用您的 driver:
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=socks5',
]
driver = webdriver.PhantomJS(service_args=service_args)
# ...............
from selenium import webdriver
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' }
for key in headers:
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = headers[key]
Andriy Ivaneyko 的方法对我不起作用(PhantomJS 2.1.1 和 Selenium 2.48.0)。
我写了一个完整的例子来设置所有 headers,window 大小和 Selenium PhantomJS 中的代理:
from selenium import webdriver
def init_phantomjs_driver(*args, **kwargs):
headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Connection': 'keep-alive'
}
for key, value in headers.iteritems():
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = value
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
driver = webdriver.PhantomJS(*args, **kwargs)
driver.set_window_size(1400,1000)
return driver
def main():
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=http',
'--ignore-ssl-errors=true'
]
driver = init_phantomjs_driver(service_args=service_args)
driver.get('http://cn.bing.com')
注一:
userAgent
设置在 phantomjs.page.settings.userAgent
而不是 phantomjs.page.customHeaders
注2:
Andriy Ivaneyko 使用enumerate
构建DesiredCapabilities.PHANTOMJS
,关键是循环索引,所以数据变成:
{
'browserName': 'phantomjs',
'javascriptEnabled': True,
'phantomjs.page.customHeaders.0': 'Accept-Language',
'phantomjs.page.customHeaders.1': 'Accept-Encoding',
'phantomjs.page.customHeaders.2': 'Accept',
'phantomjs.page.customHeaders.3': 'User-Agent',
'phantomjs.page.customHeaders.4': 'Connection',
'phantomjs.page.customHeaders.5': 'Cache-Control',
'platform': 'ANY',
'version': ''
}
None 个 header 属性设置正确。
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87")
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.get("http://www.google.com")
我想将 "custom headers" 添加到 python 中的 Selenium PhantomJS。 这些是我要添加的headers。
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
}
这是我正在使用的代码:
from selenium import webdriver
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=socks5',
]
driver = webdriver.PhantomJS(service_args=service_args)
driver.set_window_size(1120, 550)
driver.get("https://duckduckgo.com/")
driver.find_element_by_id('search_form_input_homepage').send_keys("realpython")
driver.find_element_by_id("search_button_homepage").click()
print driver.current_url
driver.quit()
如何修改包含这些自定义的代码 headers?
请帮忙。
以下列方式设置 headers:
from selenium import webdriver
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
}
for key, value in enumerate(headers):
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
然后开始使用您的 driver:
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=socks5',
]
driver = webdriver.PhantomJS(service_args=service_args)
# ...............
from selenium import webdriver
headers = { 'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'en-US,en;q=0.8',
'Cache-Control':'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' }
for key in headers:
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = headers[key]
Andriy Ivaneyko 的方法对我不起作用(PhantomJS 2.1.1 和 Selenium 2.48.0)。
我写了一个完整的例子来设置所有 headers,window 大小和 Selenium PhantomJS 中的代理:
from selenium import webdriver
def init_phantomjs_driver(*args, **kwargs):
headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Connection': 'keep-alive'
}
for key, value in headers.iteritems():
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = value
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
driver = webdriver.PhantomJS(*args, **kwargs)
driver.set_window_size(1400,1000)
return driver
def main():
service_args = [
'--proxy=127.0.0.1:9999',
'--proxy-type=http',
'--ignore-ssl-errors=true'
]
driver = init_phantomjs_driver(service_args=service_args)
driver.get('http://cn.bing.com')
注一:
userAgent
设置在 phantomjs.page.settings.userAgent
而不是 phantomjs.page.customHeaders
注2:
Andriy Ivaneyko 使用enumerate
构建DesiredCapabilities.PHANTOMJS
,关键是循环索引,所以数据变成:
{
'browserName': 'phantomjs',
'javascriptEnabled': True,
'phantomjs.page.customHeaders.0': 'Accept-Language',
'phantomjs.page.customHeaders.1': 'Accept-Encoding',
'phantomjs.page.customHeaders.2': 'Accept',
'phantomjs.page.customHeaders.3': 'User-Agent',
'phantomjs.page.customHeaders.4': 'Connection',
'phantomjs.page.customHeaders.5': 'Cache-Control',
'platform': 'ANY',
'version': ''
}
None 个 header 属性设置正确。
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87")
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.get("http://www.google.com")