在 Selenium 中更改代理 Python

Change proxy in Selenium Python

您好,我有一个用于轮换 firefox 代理的代码,我正在使用它,但它不会更改代理。


import time
import random
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

req_proxy = RequestProxy() #you may get different number of proxy when  you run this at each time
proxies = req_proxy.get_proxy_list() #this will create proxy list

change_proxy = proxies[0].get_address()
print("This is your ip and port: ", change_proxy)

ip = input("Ip: ")
port = input("Port: ")

profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", ip)
profile.set_preference("network.proxy.http_port", port)

driver = webdriver.Firefox(firefox_profile=profile)

我正在输入 ip 和端口部分的“change_proxy”到 ip 和端口,驱动程序打开 ip 检查器网站,但我看到我的旧 ip 没有任何变化:



首先我要说的是,使用免费代理 IP 地址可能会有很大问题。 这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是间歇性的,这意味着它们随时可能出现故障。有时这些网站会被滥用,因此可能会被屏蔽。

我以前没有使用过 Python 包 http_request_randomizer,我注意到文档非常精简。我查看了包的源代码以了解其结构。


import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

firefox_options = Options()

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,

# you can either do this
    # print proxy IP for testing
    # output

    URL = 'http://www.expressvpn.com/what-is-my-ip'

# Some of the proxies pulled from http_request_randomizer will timeout 
# for various reasons, so this exception is used to catch these timeouts
except TimeoutException as e:
    print("A Page load Timeout Occurred.")

# or this.  You can also put this in a try/except block and 
# increase the timeout as needed.
# driver.set_page_load_timeout(120)
# URL = 'http://www.expressvpn.com/what-is-my-ip'
# driver.get(URL)

这是一张屏幕截图,显示 Firefox 在此会话中正确使用了 IP 地址。

如前所述,免费代理可以有多个问题。下面的代码显示了如何使用 proxy judge 检查单个代理的状态。

import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

def random_ssl_proxy_address():
    # Obtain a list of HTTPS proxies
    # Suppress the console debugging output by setting the log level
    req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

    # Obtain a random single proxy from the list of proxy addresses
    random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

    return random_proxy[0].get_address()

def get_proxy_address():
    proxy_address = random_ssl_proxy_address()
    checker = ProxyChecker()
    proxy_judge = checker.check_proxy(proxy_address)
    proxy_status = [value for key, value in proxy_judge.items() if key == 'status']

    if proxy_status[0]:
        return proxy_address
        print('Looking for a valid proxy address.')

        # this sleep timer is helping with some timeout issues
        # that were happening when querying
        sleep(randint(5, 10))


random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address:

请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些以捕获一些错误。