BaseHTTPRequestHandler :多部分请求解析和 Content-length 问题
BaseHTTPRequestHandler : Multipart request parsing and Content-length issues
我在解析原始 HTTP 请求字符串并尝试确认计算出的 content-length 值时遇到困难。
解析包含多部分数据的 POST 请求时发现问题。在这种情况下,Content-length header 的值与我用 len(rfile.read) 计算的值不同。
我想这一定与二进制内容的字符编码有关,但我没有想出一种方法来获得与 Content-length HTTP header.[=12 提供的结果相同的结果=]
以下脚本演示了该问题:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from BaseHTTPServer import BaseHTTPRequestHandler
from StringIO import StringIO
str_http = """POST /abc HTTP/1.1
Host: 127.0.0.1:80
Connection: close
Accept-Encoding: gzip, deflate
Accept: */*
User-Agent: python-requests/2.18.4
content-type: text/plain
Content-Length: 7
abc 123
"""
str2_http = """POST /spi/v2/events HTTP/1.1
User-Agent: Crashlytics Android SDK/1.3.8.127
X-CRASHLYTICS-DEVELOPER-TOKEN: XXXXXXXXXXX
X-CRASHLYTICS-API-CLIENT-TYPE: android
X-CRASHLYTICS-API-CLIENT-VERSION: 1.3.8.127
X-CRASHLYTICS-API-KEY: XXXXXXXXXXX
Content-Type: multipart/form-data; boundary=00content0boundary00
Host: e.crashlytics.com
Connection: Keep-Alive
Accept-Encoding: gzip
Content-Length: 776
--00content0boundary00
Content-Disposition: form-data; name="session_analytics_file_0"; filename="sa_32e4d6c3-adef-4cd5-a571-a68e4bee65f6_1542460750651.tap"
Content-Type: application/vnd.crashlytics.android.events
�Ko�0dz�p��@Z�\��3~do
�B-Uw˩�v�ռ�Gi��͡��.�r@�U�
b�R43{��i,����'~���σ`o�)Tu��/Mnߘpꪈ�j�U}e�D�4M�L��+���U�:�ƌ�
D����b� &�-5U����T��{]�R���,(K%K@�lS��{�f�ux�ʁ��`�;w���f�(}���R������[����ﴠ9�U� � К"I�A��I* ���^վ�M���᳃XĒ`�]�^:m+cs��ˋ��X����._���uӬj�Wr�|]o�{�e� ~`>R�`��G�
�QQ�3��19� e�s]����d�ΥS.oÙ���ܥ�U���s՞G��6�ζ�rm�������nB��[Cp^�'^{�CnGvI�w�����p�H�#;HÄ�Z��������=����c5�����$۶ ��%��c@i�U,
�p����������G�
s�����8�����aC
--00content0boundary00--
"""
class HTTPRequest(BaseHTTPRequestHandler):
def __init__(self, request_text):
self.rfile = StringIO(request_text)
# ~ self.rfile = io.BytesIO(request_text)
self.raw_requestline = self.rfile.readline()
self.error_code = self.error_message = None
self.parse_request()
self.request_text = request_text
def send_error(self, code, message):
self.error_code = code
self.error_message = message
# 1st test : OK
request = HTTPRequest(str_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Complete body
request.rfile.seek(data_cursor)
print len(request.rfile.read().strip()) == content_length # True
# 2nd test : KO
request = HTTPRequest(str2_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Truncated body
request.rfile.seek(data_cursor)
print len(request.rfile.read().strip()) == content_length # False
你知道如何正确计算这个值吗?
我的目标是能够计算出正确的数据长度,以便准确知道请求何时完成传送其内容。
如果其他人也遇到类似的问题,我终于想出了解决办法。正如我在之前的 post 中所怀疑的那样,这一切都归结为编码:非 ASCII 字符必须进行十六进制编码(例如:\xff)。
以下代码显示了一个带有问题字符串的工作示例:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from BaseHTTPServer import BaseHTTPRequestHandler
from StringIO import StringIO
str_http= """POST /spi/v2/events HTTP/1.1\r\nUser-Agent: Crashlytics Android SDK/1.3.8.127\r\nX-CRASHLYTICS-DEVELOPER-TOKEN: XXXXXX\r\nX-CRASHLYTICS-API-CLIENT-TYPE: android\r\nX-CRASHLYTICS-API-CLIENT-VERSION: 1.3.8.127\r\nX-CRASHLYTICS-API-KEY: XXXXXX\r\nContent-Type: multipart/form-data; boundary=00content0boundary00\r\nHost: e.crashlytics.com\r\nConnection: Keep-Alive\r\nAccept-Encoding: gzip\r\nContent-Length: 642\r\n\r\n--00content0boundary00\r\nContent-Disposition: form-data; name="session_analytics_file_0"; filename="sa_986e8438-14b5-44a5-a02e-6c7b1cce9b84_1543176379199.tap"\r\nContent-Type: application/vnd.crashlytics.android.events\r\n\r\n\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00\xed\x91KN\xe40\x14E\x0bV\xc0\x12J\x19\x13\xe3$\xcf\x8eS\xb3\x82\xeeA\x0b\xd1\x03\xaa\xc4\x88\xc9\xf3\'`\xe18Q>|\x84\xd8{;\xa9\xa2%&,\x00\xf9H\x9e\xbc{\xaf}m\xafVg\xab\xc0IXg\x87uz\xf2\x9e`\xd7]N^;\xf3G\'\x9bD\xb5\rQ\xd8\xf7\xa6n\xa7\x9e\xd4V\x13\xf4\xbao\xadN\xce\x13\xf3j\xd44\xda\xd6/\xce\x1a\x18\x17\x14u\xca\xa8\x82\x14P\xb1T\x16\x08\xa9\x00\x9es\xceX-\x14\x84\x90\xf5\xc3\x88\xce\xe1\xff\%\xf3L\x96\x92i*\x18\xa0\xc9\x05\xab\xa0\x94\x14*j$\x18\xceC\xe6x\xe4b/h\t:\xe7\x90C\t\xc0\xc1\x04\xd9\xd9\xc6\x8e[\xbd\xefQ=Y\xff\xf0\xdb\xa3t&xkt\x839O\xe4d\xdd!\x0b\x8a\xb3\x02\xeb*E\x1a\x8a\x81Ry*r\xa1\xd3Z\xf1J\x8a,\xcb5\xcb\xc2~\xedpg\xfa!\xf4\x0b\x11N\xe8\xfdEQ\x16"\xa3"H\xda<[enZm\\x10\'\xff\xe4\xdb\x17\x7f\x7f\xb1=\x14\\xef~]\xaf\xe7\xe3\xc6u\xdd\xf6\xebW\xb1\x94\xef\xba\xe3vW!6\xdf\xa0\xa4_\xc6\x7f\xb1\x99\xc7\x82P2\x0b\xa3mLx\xa2\xa6K6\x19\x83"+yQV\x19\x88\xa0\xbcu\xb3q\xb7\xdf\xde\xee\x97.#Z7$\x9b\xf0gj\xb4\xcfv|\xfb\xee\xc3\xc8\xd09\x1c\x1e\xc9d\xc9\xd1n\xcd@v\xcbp\xfb\x99\xff\xf8XE"\x91H$\x12\x89D"\x91H\xe4\xa7\xf1\x0f\xcd}\xfe\x17\x00\x10\x00\x00\r\n--00content0boundary00--\r\n"""
class HTTPRequest(BaseHTTPRequestHandler):
def __init__(self, request_text):
self.rfile = StringIO(request_text)
self.raw_requestline = self.rfile.readline()
self.error_code = self.error_message = None
self.parse_request()
self.request_text = request_text
def send_error(self, code, message):
self.error_code = code
self.error_message = message
request = HTTPRequest(str_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Complete body
request.rfile.seek(data_cursor)
print len(request.rfile.read()) == content_length # True
我在解析原始 HTTP 请求字符串并尝试确认计算出的 content-length 值时遇到困难。
解析包含多部分数据的 POST 请求时发现问题。在这种情况下,Content-length header 的值与我用 len(rfile.read) 计算的值不同。 我想这一定与二进制内容的字符编码有关,但我没有想出一种方法来获得与 Content-length HTTP header.[=12 提供的结果相同的结果=]
以下脚本演示了该问题:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from BaseHTTPServer import BaseHTTPRequestHandler
from StringIO import StringIO
str_http = """POST /abc HTTP/1.1
Host: 127.0.0.1:80
Connection: close
Accept-Encoding: gzip, deflate
Accept: */*
User-Agent: python-requests/2.18.4
content-type: text/plain
Content-Length: 7
abc 123
"""
str2_http = """POST /spi/v2/events HTTP/1.1
User-Agent: Crashlytics Android SDK/1.3.8.127
X-CRASHLYTICS-DEVELOPER-TOKEN: XXXXXXXXXXX
X-CRASHLYTICS-API-CLIENT-TYPE: android
X-CRASHLYTICS-API-CLIENT-VERSION: 1.3.8.127
X-CRASHLYTICS-API-KEY: XXXXXXXXXXX
Content-Type: multipart/form-data; boundary=00content0boundary00
Host: e.crashlytics.com
Connection: Keep-Alive
Accept-Encoding: gzip
Content-Length: 776
--00content0boundary00
Content-Disposition: form-data; name="session_analytics_file_0"; filename="sa_32e4d6c3-adef-4cd5-a571-a68e4bee65f6_1542460750651.tap"
Content-Type: application/vnd.crashlytics.android.events
�Ko�0dz�p��@Z�\��3~do
�B-Uw˩�v�ռ�Gi��͡��.�r@�U�
b�R43{��i,����'~���σ`o�)Tu��/Mnߘpꪈ�j�U}e�D�4M�L��+���U�:�ƌ�
D����b� &�-5U����T��{]�R���,(K%K@�lS��{�f�ux�ʁ��`�;w���f�(}���R������[����ﴠ9�U� � К"I�A��I* ���^վ�M���᳃XĒ`�]�^:m+cs��ˋ��X����._���uӬj�Wr�|]o�{�e� ~`>R�`��G�
�QQ�3��19� e�s]����d�ΥS.oÙ���ܥ�U���s՞G��6�ζ�rm�������nB��[Cp^�'^{�CnGvI�w�����p�H�#;HÄ�Z��������=����c5�����$۶ ��%��c@i�U,
�p����������G�
s�����8�����aC
--00content0boundary00--
"""
class HTTPRequest(BaseHTTPRequestHandler):
def __init__(self, request_text):
self.rfile = StringIO(request_text)
# ~ self.rfile = io.BytesIO(request_text)
self.raw_requestline = self.rfile.readline()
self.error_code = self.error_message = None
self.parse_request()
self.request_text = request_text
def send_error(self, code, message):
self.error_code = code
self.error_message = message
# 1st test : OK
request = HTTPRequest(str_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Complete body
request.rfile.seek(data_cursor)
print len(request.rfile.read().strip()) == content_length # True
# 2nd test : KO
request = HTTPRequest(str2_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Truncated body
request.rfile.seek(data_cursor)
print len(request.rfile.read().strip()) == content_length # False
你知道如何正确计算这个值吗? 我的目标是能够计算出正确的数据长度,以便准确知道请求何时完成传送其内容。
如果其他人也遇到类似的问题,我终于想出了解决办法。正如我在之前的 post 中所怀疑的那样,这一切都归结为编码:非 ASCII 字符必须进行十六进制编码(例如:\xff)。
以下代码显示了一个带有问题字符串的工作示例:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from BaseHTTPServer import BaseHTTPRequestHandler
from StringIO import StringIO
str_http= """POST /spi/v2/events HTTP/1.1\r\nUser-Agent: Crashlytics Android SDK/1.3.8.127\r\nX-CRASHLYTICS-DEVELOPER-TOKEN: XXXXXX\r\nX-CRASHLYTICS-API-CLIENT-TYPE: android\r\nX-CRASHLYTICS-API-CLIENT-VERSION: 1.3.8.127\r\nX-CRASHLYTICS-API-KEY: XXXXXX\r\nContent-Type: multipart/form-data; boundary=00content0boundary00\r\nHost: e.crashlytics.com\r\nConnection: Keep-Alive\r\nAccept-Encoding: gzip\r\nContent-Length: 642\r\n\r\n--00content0boundary00\r\nContent-Disposition: form-data; name="session_analytics_file_0"; filename="sa_986e8438-14b5-44a5-a02e-6c7b1cce9b84_1543176379199.tap"\r\nContent-Type: application/vnd.crashlytics.android.events\r\n\r\n\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00\xed\x91KN\xe40\x14E\x0bV\xc0\x12J\x19\x13\xe3$\xcf\x8eS\xb3\x82\xeeA\x0b\xd1\x03\xaa\xc4\x88\xc9\xf3\'`\xe18Q>|\x84\xd8{;\xa9\xa2%&,\x00\xf9H\x9e\xbc{\xaf}m\xafVg\xab\xc0IXg\x87uz\xf2\x9e`\xd7]N^;\xf3G\'\x9bD\xb5\rQ\xd8\xf7\xa6n\xa7\x9e\xd4V\x13\xf4\xbao\xadN\xce\x13\xf3j\xd44\xda\xd6/\xce\x1a\x18\x17\x14u\xca\xa8\x82\x14P\xb1T\x16\x08\xa9\x00\x9es\xceX-\x14\x84\x90\xf5\xc3\x88\xce\xe1\xff\%\xf3L\x96\x92i*\x18\xa0\xc9\x05\xab\xa0\x94\x14*j$\x18\xceC\xe6x\xe4b/h\t:\xe7\x90C\t\xc0\xc1\x04\xd9\xd9\xc6\x8e[\xbd\xefQ=Y\xff\xf0\xdb\xa3t&xkt\x839O\xe4d\xdd!\x0b\x8a\xb3\x02\xeb*E\x1a\x8a\x81Ry*r\xa1\xd3Z\xf1J\x8a,\xcb5\xcb\xc2~\xedpg\xfa!\xf4\x0b\x11N\xe8\xfdEQ\x16"\xa3"H\xda<[enZm\\x10\'\xff\xe4\xdb\x17\x7f\x7f\xb1=\x14\\xef~]\xaf\xe7\xe3\xc6u\xdd\xf6\xebW\xb1\x94\xef\xba\xe3vW!6\xdf\xa0\xa4_\xc6\x7f\xb1\x99\xc7\x82P2\x0b\xa3mLx\xa2\xa6K6\x19\x83"+yQV\x19\x88\xa0\xbcu\xb3q\xb7\xdf\xde\xee\x97.#Z7$\x9b\xf0gj\xb4\xcfv|\xfb\xee\xc3\xc8\xd09\x1c\x1e\xc9d\xc9\xd1n\xcd@v\xcbp\xfb\x99\xff\xf8XE"\x91H$\x12\x89D"\x91H\xe4\xa7\xf1\x0f\xcd}\xfe\x17\x00\x10\x00\x00\r\n--00content0boundary00--\r\n"""
class HTTPRequest(BaseHTTPRequestHandler):
def __init__(self, request_text):
self.rfile = StringIO(request_text)
self.raw_requestline = self.rfile.readline()
self.error_code = self.error_message = None
self.parse_request()
self.request_text = request_text
def send_error(self, code, message):
self.error_code = code
self.error_message = message
request = HTTPRequest(str_http)
content_length = int(request.headers.get('content-length') or 0)
data_cursor = request.rfile.tell()
print request.rfile.read(content_length) # Complete body
request.rfile.seek(data_cursor)
print len(request.rfile.read()) == content_length # True