Python 请求库中的猴子补丁 _ssl_wrap_socket 未执行
Monkey patching _ssl_wrap_socket in Python requests library isn't executing
我们正在尝试为 Web 服务器虚拟主机扫描工具添加 HTTPS 支持。所述工具使用 python3 请求库,该库在底层使用 urllib3。
我们需要一种方法来提供我们自己的 SNI 主机名,因此正在尝试通过猴子修补 urllib3 的 _ssl_wrap_socket
函数来控制 server_hostname
,但收效甚微。
完整代码如下:
from urllib3.util import ssl_
_target_host = None
_orig_wrap_socket = ssl_.ssl_wrap_socket
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
ssl_.ssl_wrap_socket = _ssl_wrap_socket
然后我们在代码中进一步调用 requests.get()
。可以在 Github (here).
上找到完整的上下文
不幸的是,这不起作用,因为我们的代码似乎从未被访问过,我们也不确定为什么。是否有明显的遗漏或解决此问题的更好方法?
进一步说明
以下是完整的class:
import os
import random
import requests
import hashlib
import pandas as pd
import time
from lib.core.discovered_host import *
import urllib3
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/61.0.3163.100 Safari/537.36'
urllib3.disable_warnings()
from urllib3.util import ssl_
class virtual_host_scanner(object):
"""Virtual host scanning class
Virtual host scanner has the following properties:
Attributes:
wordlist: location to a wordlist file to use with scans
target: the target for scanning
port: the port to scan. Defaults to 80
ignore_http_codes: commad seperated list of http codes to ignore
ignore_content_length: integer value of content length to ignore
output: folder to write output file to
"""
def __init__(self, target, wordlist, **kwargs):
self.target = target
self.wordlist = wordlist
self.base_host = kwargs.get('base_host')
self.rate_limit = int(kwargs.get('rate_limit', 0))
self.port = int(kwargs.get('port', 80))
self.real_port = int(kwargs.get('real_port', 80))
self.ssl = kwargs.get('ssl', False)
self.fuzzy_logic = kwargs.get('fuzzy_logic', False)
self.unique_depth = int(kwargs.get('unique_depth', 1))
self.ignore_http_codes = kwargs.get('ignore_http_codes', '404')
self.first_hit = kwargs.get('first_hit')
self.ignore_content_length = int(
kwargs.get('ignore_content_length', 0)
)
self.add_waf_bypass_headers = kwargs.get(
'add_waf_bypass_headers',
False
)
# this can be made redundant in future with better exceptions
self.completed_scan = False
# this is maintained until likely-matches is refactored to use
# new class
self.results = []
# store associated data for discovered hosts
# in array for oN, oJ, etc'
self.hosts = []
# available user-agents
self.user_agents = list(kwargs.get('user_agents')) \
or [DEFAULT_USER_AGENT]
@property
def ignore_http_codes(self):
return self._ignore_http_codes
@ignore_http_codes.setter
def ignore_http_codes(self, codes):
self._ignore_http_codes = [
int(code) for code in codes.replace(' ', '').split(',')
]
_target_host = None
_orig_wrap_socket = ssl_.ssl_wrap_socket
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
def scan(self):
print('fdsa')
ssl_.ssl_wrap_socket = self._ssl_wrap_socket
if not self.base_host:
self.base_host = self.target
if not self.real_port:
self.real_port = self.port
for virtual_host in self.wordlist:
hostname = virtual_host.replace('%s', self.base_host)
if self.real_port == 80:
host_header = hostname
else:
host_header = '{}:{}'.format(hostname, self.real_port)
headers = {
'User-Agent': random.choice(self.user_agents),
'Host': host_header,
'Accept': '*/*'
}
if self.add_waf_bypass_headers:
headers.update({
'X-Originating-IP': '127.0.0.1',
'X-Forwarded-For': '127.0.0.1',
'X-Remote-IP': '127.0.0.1',
'X-Remote-Addr': '127.0.0.1'
})
dest_url = '{}://{}:{}/'.format(
'https' if self.ssl else 'http',
self.target,
self.port
)
_target_host = hostname
try:
res = requests.get(dest_url, headers=headers, verify=False)
except requests.exceptions.RequestException:
continue
if res.status_code in self.ignore_http_codes:
continue
response_length = int(res.headers.get('content-length', 0))
if self.ignore_content_length and \
self.ignore_content_length == response_length:
continue
# hash the page results to aid in identifing unique content
page_hash = hashlib.sha256(res.text.encode('utf-8')).hexdigest()
self.hosts.append(self.create_host(res, hostname, page_hash))
# add url and hash into array for likely matches
self.results.append(hostname + ',' + page_hash)
if len(self.hosts) >= 1 and self.first_hit:
break
# rate limit the connection, if the int is 0 it is ignored
time.sleep(self.rate_limit)
self.completed_scan = True
def likely_matches(self):
if self.completed_scan is False:
print("[!] Likely matches cannot be printed "
"as a scan has not yet been run.")
return
# segment results from previous scan into usable results
segmented_data = {}
for item in self.results:
result = item.split(",")
segmented_data[result[0]] = result[1]
dataframe = pd.DataFrame([
[key, value] for key, value in segmented_data.items()],
columns=["key_col", "val_col"]
)
segmented_data = dataframe.groupby("val_col").filter(
lambda x: len(x) <= self.unique_depth
)
return segmented_data["key_col"].values.tolist()
def create_host(self, response, hostname, page_hash):
"""
Creates a host using the responce and the hash.
Prints current result in real time.
"""
output = '[#] Found: {} (code: {}, length: {}, hash: {})\n'.format(
hostname,
response.status_code,
response.headers.get('content-length'),
page_hash
)
host = discovered_host()
host.hostname = hostname
host.response_code = response.status_code
host.hash = page_hash
host.contnet = response.content
for key, val in response.headers.items():
output += ' {}: {}\n'.format(key, val)
host.keys.append('{}: {}'.format(key, val))
print(output)
return host
在这种情况下,永远不会命中以下行:
print('SHOULD BE PRINTED')
这还会导致 Web 服务器上出现以下日志条目:
[Wed Oct 25 16:37:23.654321 2017] [ssl:error] [pid 1355] AH02032:
Hostname provided via SNI and hostname test.test provided via
HTTP are different
这表明代码从未 运行 也。
Edit-1:无需重新加载
感谢@MartijnPieters 帮助我完善答案。如果我们直接修补 urllib3.connection
,则不需要重新加载。但是 requests 包在最新版本中有一些变化,这使得原来的答案在某些版本的 requests 上不起作用。
这是代码的更新版本,可以处理所有这些事情
import requests
try:
assert requests.__version__ != "2.18.0"
import requests.packages.urllib3.util.ssl_ as ssl_
import requests.packages.urllib3.connection as connection
except (ImportError,AssertionError,AttributeError):
import urllib3.util.ssl_ as ssl_
import urllib3.connection as connection
print("Using " + requests.__version__)
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
return ssl_.ssl_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=server_hostname, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
connection.ssl_wrap_socket = _ssl_wrap_socket
res = requests.get("https://www.google.com", verify=True)
代码也可以在
https://github.com/tarunlalwani/monkey-patch-ssl_wrap_socket
原答案
你的代码中有两个问题。
requests
实际上并不直接导入 urllib3
。它使用 requests.packages
通过自己的上下文来完成
所以你要覆盖的套接字是
requests.packages.urllib3.util.ssl_.ssl_wrap_socket
接下来如果你从 urllib3/connection.py
查看 connection.py
from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
assert_fingerprint,
)
这是本地导入,第一次尝试时无法覆盖它,因为我们使用 import requests
时会加载代码。您可以通过放置断点并检查堆栈跟踪返回父文件来轻松确认。
所以为了让猴子补丁工作,我们需要在补丁完成后重新加载模块,所以它需要我们的补丁函数
下面是显示拦截以这种方式工作的最小代码
try:
reload # Python 2.7
except NameError:
try:
from importlib import reload # Python 3.4+
except ImportError:
from imp import reload # Python 3.0 - 3.3
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
import requests
_orig_wrap_socket = requests.packages.urllib3.util.ssl_.ssl_wrap_socket
requests.packages.urllib3.util.ssl_.ssl_wrap_socket = _ssl_wrap_socket
reload(requests.packages.urllib3.connection)
res = requests.get("https://www.google.com", verify=True)
我们正在尝试为 Web 服务器虚拟主机扫描工具添加 HTTPS 支持。所述工具使用 python3 请求库,该库在底层使用 urllib3。
我们需要一种方法来提供我们自己的 SNI 主机名,因此正在尝试通过猴子修补 urllib3 的 _ssl_wrap_socket
函数来控制 server_hostname
,但收效甚微。
完整代码如下:
from urllib3.util import ssl_
_target_host = None
_orig_wrap_socket = ssl_.ssl_wrap_socket
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
ssl_.ssl_wrap_socket = _ssl_wrap_socket
然后我们在代码中进一步调用 requests.get()
。可以在 Github (here).
不幸的是,这不起作用,因为我们的代码似乎从未被访问过,我们也不确定为什么。是否有明显的遗漏或解决此问题的更好方法?
进一步说明
以下是完整的class:
import os
import random
import requests
import hashlib
import pandas as pd
import time
from lib.core.discovered_host import *
import urllib3
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/61.0.3163.100 Safari/537.36'
urllib3.disable_warnings()
from urllib3.util import ssl_
class virtual_host_scanner(object):
"""Virtual host scanning class
Virtual host scanner has the following properties:
Attributes:
wordlist: location to a wordlist file to use with scans
target: the target for scanning
port: the port to scan. Defaults to 80
ignore_http_codes: commad seperated list of http codes to ignore
ignore_content_length: integer value of content length to ignore
output: folder to write output file to
"""
def __init__(self, target, wordlist, **kwargs):
self.target = target
self.wordlist = wordlist
self.base_host = kwargs.get('base_host')
self.rate_limit = int(kwargs.get('rate_limit', 0))
self.port = int(kwargs.get('port', 80))
self.real_port = int(kwargs.get('real_port', 80))
self.ssl = kwargs.get('ssl', False)
self.fuzzy_logic = kwargs.get('fuzzy_logic', False)
self.unique_depth = int(kwargs.get('unique_depth', 1))
self.ignore_http_codes = kwargs.get('ignore_http_codes', '404')
self.first_hit = kwargs.get('first_hit')
self.ignore_content_length = int(
kwargs.get('ignore_content_length', 0)
)
self.add_waf_bypass_headers = kwargs.get(
'add_waf_bypass_headers',
False
)
# this can be made redundant in future with better exceptions
self.completed_scan = False
# this is maintained until likely-matches is refactored to use
# new class
self.results = []
# store associated data for discovered hosts
# in array for oN, oJ, etc'
self.hosts = []
# available user-agents
self.user_agents = list(kwargs.get('user_agents')) \
or [DEFAULT_USER_AGENT]
@property
def ignore_http_codes(self):
return self._ignore_http_codes
@ignore_http_codes.setter
def ignore_http_codes(self, codes):
self._ignore_http_codes = [
int(code) for code in codes.replace(' ', '').split(',')
]
_target_host = None
_orig_wrap_socket = ssl_.ssl_wrap_socket
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
def scan(self):
print('fdsa')
ssl_.ssl_wrap_socket = self._ssl_wrap_socket
if not self.base_host:
self.base_host = self.target
if not self.real_port:
self.real_port = self.port
for virtual_host in self.wordlist:
hostname = virtual_host.replace('%s', self.base_host)
if self.real_port == 80:
host_header = hostname
else:
host_header = '{}:{}'.format(hostname, self.real_port)
headers = {
'User-Agent': random.choice(self.user_agents),
'Host': host_header,
'Accept': '*/*'
}
if self.add_waf_bypass_headers:
headers.update({
'X-Originating-IP': '127.0.0.1',
'X-Forwarded-For': '127.0.0.1',
'X-Remote-IP': '127.0.0.1',
'X-Remote-Addr': '127.0.0.1'
})
dest_url = '{}://{}:{}/'.format(
'https' if self.ssl else 'http',
self.target,
self.port
)
_target_host = hostname
try:
res = requests.get(dest_url, headers=headers, verify=False)
except requests.exceptions.RequestException:
continue
if res.status_code in self.ignore_http_codes:
continue
response_length = int(res.headers.get('content-length', 0))
if self.ignore_content_length and \
self.ignore_content_length == response_length:
continue
# hash the page results to aid in identifing unique content
page_hash = hashlib.sha256(res.text.encode('utf-8')).hexdigest()
self.hosts.append(self.create_host(res, hostname, page_hash))
# add url and hash into array for likely matches
self.results.append(hostname + ',' + page_hash)
if len(self.hosts) >= 1 and self.first_hit:
break
# rate limit the connection, if the int is 0 it is ignored
time.sleep(self.rate_limit)
self.completed_scan = True
def likely_matches(self):
if self.completed_scan is False:
print("[!] Likely matches cannot be printed "
"as a scan has not yet been run.")
return
# segment results from previous scan into usable results
segmented_data = {}
for item in self.results:
result = item.split(",")
segmented_data[result[0]] = result[1]
dataframe = pd.DataFrame([
[key, value] for key, value in segmented_data.items()],
columns=["key_col", "val_col"]
)
segmented_data = dataframe.groupby("val_col").filter(
lambda x: len(x) <= self.unique_depth
)
return segmented_data["key_col"].values.tolist()
def create_host(self, response, hostname, page_hash):
"""
Creates a host using the responce and the hash.
Prints current result in real time.
"""
output = '[#] Found: {} (code: {}, length: {}, hash: {})\n'.format(
hostname,
response.status_code,
response.headers.get('content-length'),
page_hash
)
host = discovered_host()
host.hostname = hostname
host.response_code = response.status_code
host.hash = page_hash
host.contnet = response.content
for key, val in response.headers.items():
output += ' {}: {}\n'.format(key, val)
host.keys.append('{}: {}'.format(key, val))
print(output)
return host
在这种情况下,永远不会命中以下行:
print('SHOULD BE PRINTED')
这还会导致 Web 服务器上出现以下日志条目:
[Wed Oct 25 16:37:23.654321 2017] [ssl:error] [pid 1355] AH02032: Hostname provided via SNI and hostname test.test provided via HTTP are different
这表明代码从未 运行 也。
Edit-1:无需重新加载
感谢@MartijnPieters 帮助我完善答案。如果我们直接修补 urllib3.connection
,则不需要重新加载。但是 requests 包在最新版本中有一些变化,这使得原来的答案在某些版本的 requests 上不起作用。
这是代码的更新版本,可以处理所有这些事情
import requests
try:
assert requests.__version__ != "2.18.0"
import requests.packages.urllib3.util.ssl_ as ssl_
import requests.packages.urllib3.connection as connection
except (ImportError,AssertionError,AttributeError):
import urllib3.util.ssl_ as ssl_
import urllib3.connection as connection
print("Using " + requests.__version__)
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
return ssl_.ssl_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=server_hostname, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
connection.ssl_wrap_socket = _ssl_wrap_socket
res = requests.get("https://www.google.com", verify=True)
代码也可以在
https://github.com/tarunlalwani/monkey-patch-ssl_wrap_socket
原答案
你的代码中有两个问题。
requests
实际上并不直接导入 urllib3
。它使用 requests.packages
所以你要覆盖的套接字是
requests.packages.urllib3.util.ssl_.ssl_wrap_socket
接下来如果你从 urllib3/connection.py
connection.py
from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
assert_fingerprint,
)
这是本地导入,第一次尝试时无法覆盖它,因为我们使用 import requests
时会加载代码。您可以通过放置断点并检查堆栈跟踪返回父文件来轻松确认。
所以为了让猴子补丁工作,我们需要在补丁完成后重新加载模块,所以它需要我们的补丁函数
下面是显示拦截以这种方式工作的最小代码
try:
reload # Python 2.7
except NameError:
try:
from importlib import reload # Python 3.4+
except ImportError:
from imp import reload # Python 3.0 - 3.3
def _ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
print('SHOULD BE PRINTED')
_orig_wrap_socket(sock, keyfile=keyfile, certfile=certfile,
cert_reqs=cert_reqs, ca_certs=ca_certs,
server_hostname=_target_host, ssl_version=ssl_version,
ciphers=ciphers, ssl_context=ssl_context,
ca_cert_dir=ca_cert_dir)
import requests
_orig_wrap_socket = requests.packages.urllib3.util.ssl_.ssl_wrap_socket
requests.packages.urllib3.util.ssl_.ssl_wrap_socket = _ssl_wrap_socket
reload(requests.packages.urllib3.connection)
res = requests.get("https://www.google.com", verify=True)