在 Selenium 中更改代理 Python

Change proxy in Selenium Python

您好,我有一个用于轮换 firefox 代理的代码,我正在使用它,但它不会更改代理。

这是我的代码:

import time
import random
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

req_proxy = RequestProxy() #you may get different number of proxy when  you run this at each time
proxies = req_proxy.get_proxy_list() #this will create proxy list

change_proxy = proxies[0].get_address()
print("This is your ip and port: ", change_proxy)

ip = input("Ip: ")
port = input("Port: ")

profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", ip)
profile.set_preference("network.proxy.http_port", port)
profile.update_preferences()

driver = webdriver.Firefox(firefox_profile=profile)
driver.get("https://whatismyipaddress.com/");

我正在输入 ip 和端口部分的“change_proxy”到 ip 和端口,驱动程序打开 ip 检查器网站,但我看到我的旧 ip 没有任何变化:

和我原来的ip:

抱歉英语不好。

首先我要说的是,使用免费代理 IP 地址可能会有很大问题。 这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是间歇性的,这意味着它们随时可能出现故障。有时这些网站会被滥用,因此可能会被屏蔽。

我以前没有使用过 Python 包 http_request_randomizer,我注意到文档非常精简。我查看了包的源代码以了解其结构。

下面是一段代码,使用http_request_randomizer获取随机HTTPS代理,传给geckodriver使用

import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                           desired_capabilities=firefox_capabilities)


# you can either do this
try:
    # print proxy IP for testing
    print(random_proxy[0].get_address())
    # output 
    46.151.145.4:53281

    URL = 'http://www.expressvpn.com/what-is-my-ip'
    driver.get(URL)

# Some of the proxies pulled from http_request_randomizer will timeout 
# for various reasons, so this exception is used to catch these timeouts
except TimeoutException as e:
    print("A Page load Timeout Occurred.")
    driver.quit()

# or this.  You can also put this in a try/except block and 
# increase the timeout as needed.
# 
# driver.set_page_load_timeout(120)
# URL = 'http://www.expressvpn.com/what-is-my-ip'
# driver.get(URL)

这是一张屏幕截图,显示 Firefox 在此会话中正确使用了 IP 地址。

如前所述,免费代理可以有多个问题。下面的代码显示了如何使用 proxy judge 检查单个代理的状态。

import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy


def random_ssl_proxy_address():
    # Obtain a list of HTTPS proxies
    # Suppress the console debugging output by setting the log level
    req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

    # Obtain a random single proxy from the list of proxy addresses
    random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

    return random_proxy[0].get_address()


def get_proxy_address():
    proxy_address = random_ssl_proxy_address()
    checker = ProxyChecker()
    proxy_judge = checker.check_proxy(proxy_address)
    proxy_status = [value for key, value in proxy_judge.items() if key == 'status']

    if proxy_status[0]:
        return proxy_address
    else:
        print('Looking for a valid proxy address.')

        # this sleep timer is helping with some timeout issues
        # that were happening when querying
        sleep(randint(5, 10))

        get_proxy_address()


random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128

请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些以捕获一些错误。