python 中的 requests.get() 或 requests.post() 无法连接到代理错误
Cannot connect to proxy error on requests.get() or requests.post() in python
我有两个 URL 可以从中获取数据。使用我的代码,第一个 URL 有效,而第二个 URL 给出 ProxyError
.
我在 Python 3 中使用 requests
库并尝试在 Google 和此处搜索问题,但没有成功。
我的代码片段是:
import requests
proxies = {
'http': 'http://user:pass@xxx.xxx.xxx.xxx:xxxx',
'https': 'http://user:pass@xxx.xxx.xxx.xxx:xxxx',
}
url1 = 'https://en.oxforddictionaries.com/definition/act'
url2 = 'https://dictionary.cambridge.org/dictionary/english/act'
r1 = requests.get(url1, proxies=proxies)
r2 = requests.get(url2, proxies=proxies)
url1
工作正常,但 url2
给出以下错误:
ProxyError: HTTPSConnectionPool(host='dictionary.cambridge.org', port=443): Max retries exceeded with url: /dictionary/english/act (Caused by ProxyError('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response',)))
同样发生在使用 request.post()
请解释为什么会这样,URL 的握手有什么不同吗?
urllib.request.urlopen
工作正常,所以我明确地使用 requests
寻找答案
当使用 headers 关键字参数并将 User-Agent
字符串设置为 Chrome
.
时,我能够非法对 url2
进行有效响应
r2 = requests.get(url2, proxies=proxies, headers={'User-Agent': 'Chrome'})
回答您的第一个问题,发生这种情况的可能原因与 server-side 设置有关。它可能被配置为不接受来自未知代理的请求或缺少 User-Agent
header.
的请求
import re
import requests
import json
from bs4 import BeautifulSoup
import pymysql
import time, datetime
import os
from requests.adapters import HTTPAdapter
def get_random_proxy():
proxypool_url = 'http://127.0.0.1:5555/random'
"""
get random proxy from proxypool
:return: proxy
"""
return requests.get(proxypool_url).text.strip()
headers = {
'User-Agent': 'Chrome',
'Referer': 'https://www.nmpa.gov.cn/datasearch/home-index.html?79QlcAyHig6m=1636513393895',
'Host': 'nmpa.gov.cn',
'Origin': 'https://nmpa.gov.cn',
'Content-Type': 'application/x-www-form-urlencoded',
'Connection': 'close'
}
url = 'https://www.nmpa.gov.cn/datasearch/search-result.html'
def start_requests(coo):
# r = json.loads(r.text)
headers['Set-Cookie'] = coo
s = requests.get(url=url, headers=headers, stream=True, timeout=(5, 5), verify=False)
s.encoding = 'utf8'
print(s.status_code)
print(s)
while True:
proxy = {'http': 'http://' + get_random_proxy(), 'https': 'https://' + get_random_proxy()}
print(proxy)
try:
sess = requests.Session()
sess.keep_alive = False # 关闭多余连接
res = sess.get(url='https://nmpa.gov.cn', headers={'User-Agent': 'Chrome'}, proxies=proxy, timeout=10,
verify=False)
res.close()
print(res.status_code)
res.encoding = 'utf8'
cookie = res.headers['Set-Cookie']
print(cookie)
if res.status_code == 200:
print(res.status_code)
time.sleep(10)
start_requests(cookie)
break
except Exception as error:
time.sleep(10)
print("没有连接成功", error)
我有两个 URL 可以从中获取数据。使用我的代码,第一个 URL 有效,而第二个 URL 给出 ProxyError
.
我在 Python 3 中使用 requests
库并尝试在 Google 和此处搜索问题,但没有成功。
我的代码片段是:
import requests
proxies = {
'http': 'http://user:pass@xxx.xxx.xxx.xxx:xxxx',
'https': 'http://user:pass@xxx.xxx.xxx.xxx:xxxx',
}
url1 = 'https://en.oxforddictionaries.com/definition/act'
url2 = 'https://dictionary.cambridge.org/dictionary/english/act'
r1 = requests.get(url1, proxies=proxies)
r2 = requests.get(url2, proxies=proxies)
url1
工作正常,但 url2
给出以下错误:
ProxyError: HTTPSConnectionPool(host='dictionary.cambridge.org', port=443): Max retries exceeded with url: /dictionary/english/act (Caused by ProxyError('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response',)))
同样发生在使用 request.post()
请解释为什么会这样,URL 的握手有什么不同吗?
urllib.request.urlopen
工作正常,所以我明确地使用requests
寻找答案
当使用 headers 关键字参数并将 User-Agent
字符串设置为 Chrome
.
url2
进行有效响应
r2 = requests.get(url2, proxies=proxies, headers={'User-Agent': 'Chrome'})
回答您的第一个问题,发生这种情况的可能原因与 server-side 设置有关。它可能被配置为不接受来自未知代理的请求或缺少 User-Agent
header.
import re
import requests
import json
from bs4 import BeautifulSoup
import pymysql
import time, datetime
import os
from requests.adapters import HTTPAdapter
def get_random_proxy():
proxypool_url = 'http://127.0.0.1:5555/random'
"""
get random proxy from proxypool
:return: proxy
"""
return requests.get(proxypool_url).text.strip()
headers = {
'User-Agent': 'Chrome',
'Referer': 'https://www.nmpa.gov.cn/datasearch/home-index.html?79QlcAyHig6m=1636513393895',
'Host': 'nmpa.gov.cn',
'Origin': 'https://nmpa.gov.cn',
'Content-Type': 'application/x-www-form-urlencoded',
'Connection': 'close'
}
url = 'https://www.nmpa.gov.cn/datasearch/search-result.html'
def start_requests(coo):
# r = json.loads(r.text)
headers['Set-Cookie'] = coo
s = requests.get(url=url, headers=headers, stream=True, timeout=(5, 5), verify=False)
s.encoding = 'utf8'
print(s.status_code)
print(s)
while True:
proxy = {'http': 'http://' + get_random_proxy(), 'https': 'https://' + get_random_proxy()}
print(proxy)
try:
sess = requests.Session()
sess.keep_alive = False # 关闭多余连接
res = sess.get(url='https://nmpa.gov.cn', headers={'User-Agent': 'Chrome'}, proxies=proxy, timeout=10,
verify=False)
res.close()
print(res.status_code)
res.encoding = 'utf8'
cookie = res.headers['Set-Cookie']
print(cookie)
if res.status_code == 200:
print(res.status_code)
time.sleep(10)
start_requests(cookie)
break
except Exception as error:
time.sleep(10)
print("没有连接成功", error)