Post unicode 字符串到 Web 服务使用 Python 请求库
Post unicode string to web service using Python Requests library
我正在尝试 post 使用 requests 库将一段包含花哨的 unicode 符号的文本发送到 Web 服务。我正在使用 Python 3.5.
text = "Två dagar kvar"
r = requests.post("http://json-tagger.herokuapp.com/tag", data=text)
print(r.json()
我得到一个 UnicodeEncodeError,但我无法弄清楚我这边做错了什么,请求的文档只讨论了我所看到的 GET 请求中的 unicode。
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-125-3ebcae3d7918> in <module>()
19 print("cleaned : " + line)
20
---> 21 r = requests.post("http://json-tagger.herokuapp.com/tag", data=line)
22 sentences = r.json()['sentences']
23 for sentence in sentences:
//anaconda/lib/python3.4/site-packages/requests/api.py in post(url, data, json, **kwargs)
105 """
106
--> 107 return request('post', url, data=data, json=json, **kwargs)
108
109
//anaconda/lib/python3.4/site-packages/requests/api.py in request(method, url, **kwargs)
51 # cases, and look like a memory leak in others.
52 with sessions.Session() as session:
---> 53 return session.request(method=method, url=url, **kwargs)
54
55
//anaconda/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
466 }
467 send_kwargs.update(settings)
--> 468 resp = self.send(prep, **send_kwargs)
469
470 return resp
//anaconda/lib/python3.4/site-packages/requests/sessions.py in send(self, request, **kwargs)
574
575 # Send the request
--> 576 r = adapter.send(request, **kwargs)
577
578 # Total elapsed time of the request (approximately)
//anaconda/lib/python3.4/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
374 decode_content=False,
375 retries=self.max_retries,
--> 376 timeout=timeout
377 )
378
//anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, **response_kw)
557 httplib_response = self._make_request(conn, method, url,
558 timeout=timeout_obj,
--> 559 body=body, headers=headers)
560
561 # If we're going to release the connection in ``finally:``, then
//anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, **httplib_request_kw)
351 # conn.request() calls httplib.*.request, not the method in
352 # urllib3.request. It also calls makefile (recv) on the socket.
--> 353 conn.request(method, url, **httplib_request_kw)
354
355 # Reset the timeout for the recv() on the socket
//anaconda/lib/python3.4/http/client.py in request(self, method, url, body, headers)
1086 def request(self, method, url, body=None, headers={}):
1087 """Send a complete request to the server."""
-> 1088 self._send_request(method, url, body, headers)
1089
1090 def _set_content_length(self, body):
//anaconda/lib/python3.4/http/client.py in _send_request(self, method, url, body, headers)
1123 # RFC 2616 Section 3.7.1 says that text default has a
1124 # default charset of iso-8859-1.
-> 1125 body = body.encode('iso-8859-1')
1126 self.endheaders(body)
1127
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 14-15: ordinal not in range(256)
解决方法: 我从 "emoticon" 块 U+1F600 - U+1F64F 和符号和象形文字块 U+ 的文本中删除了所有 unicode 字符1F300 - U+1F5FF 根据this回答如下代码,因为我不需要表情和图片来分析:
text = re.sub(r'[^\u1F600-\u1F64F ]|[^\u1F300-\u1F5FF ]',"",text)
UPDATE Web 服务的创建者现已修复此问题并更新了文档。您所要做的就是发送一个编码字符串,在 Python 3:
""Två dagar kvar".encode("utf-8")
不清楚 json-tagger.herokuapp.com 期望的内容类型(示例相互矛盾)。您可以尝试 post 将数据作为文本:
#!/usr/bin/env python
import requests # pip install requests
r = requests.post(url,
data=text.encode('utf-8'),
headers={'Content-type': 'text/plain; charset=utf-8'})
print(r.json())
或者您可以尝试将其发送为 application/x-www-form-urlencoded
:
#!/usr/bin/env python
import requests # pip install requests
r = requests.post(url, data=dict(data=text))
print(r.json())
服务器可能同时拒绝、接受两者、只接受一个而不接受另一个,或者期望其他格式(例如,application/json
)等
我正在尝试 post 使用 requests 库将一段包含花哨的 unicode 符号的文本发送到 Web 服务。我正在使用 Python 3.5.
text = "Två dagar kvar"
r = requests.post("http://json-tagger.herokuapp.com/tag", data=text)
print(r.json()
我得到一个 UnicodeEncodeError,但我无法弄清楚我这边做错了什么,请求的文档只讨论了我所看到的 GET 请求中的 unicode。
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-125-3ebcae3d7918> in <module>()
19 print("cleaned : " + line)
20
---> 21 r = requests.post("http://json-tagger.herokuapp.com/tag", data=line)
22 sentences = r.json()['sentences']
23 for sentence in sentences:
//anaconda/lib/python3.4/site-packages/requests/api.py in post(url, data, json, **kwargs)
105 """
106
--> 107 return request('post', url, data=data, json=json, **kwargs)
108
109
//anaconda/lib/python3.4/site-packages/requests/api.py in request(method, url, **kwargs)
51 # cases, and look like a memory leak in others.
52 with sessions.Session() as session:
---> 53 return session.request(method=method, url=url, **kwargs)
54
55
//anaconda/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
466 }
467 send_kwargs.update(settings)
--> 468 resp = self.send(prep, **send_kwargs)
469
470 return resp
//anaconda/lib/python3.4/site-packages/requests/sessions.py in send(self, request, **kwargs)
574
575 # Send the request
--> 576 r = adapter.send(request, **kwargs)
577
578 # Total elapsed time of the request (approximately)
//anaconda/lib/python3.4/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
374 decode_content=False,
375 retries=self.max_retries,
--> 376 timeout=timeout
377 )
378
//anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, **response_kw)
557 httplib_response = self._make_request(conn, method, url,
558 timeout=timeout_obj,
--> 559 body=body, headers=headers)
560
561 # If we're going to release the connection in ``finally:``, then
//anaconda/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, **httplib_request_kw)
351 # conn.request() calls httplib.*.request, not the method in
352 # urllib3.request. It also calls makefile (recv) on the socket.
--> 353 conn.request(method, url, **httplib_request_kw)
354
355 # Reset the timeout for the recv() on the socket
//anaconda/lib/python3.4/http/client.py in request(self, method, url, body, headers)
1086 def request(self, method, url, body=None, headers={}):
1087 """Send a complete request to the server."""
-> 1088 self._send_request(method, url, body, headers)
1089
1090 def _set_content_length(self, body):
//anaconda/lib/python3.4/http/client.py in _send_request(self, method, url, body, headers)
1123 # RFC 2616 Section 3.7.1 says that text default has a
1124 # default charset of iso-8859-1.
-> 1125 body = body.encode('iso-8859-1')
1126 self.endheaders(body)
1127
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 14-15: ordinal not in range(256)
解决方法: 我从 "emoticon" 块 U+1F600 - U+1F64F 和符号和象形文字块 U+ 的文本中删除了所有 unicode 字符1F300 - U+1F5FF 根据this回答如下代码,因为我不需要表情和图片来分析:
text = re.sub(r'[^\u1F600-\u1F64F ]|[^\u1F300-\u1F5FF ]',"",text)
UPDATE Web 服务的创建者现已修复此问题并更新了文档。您所要做的就是发送一个编码字符串,在 Python 3:
""Två dagar kvar".encode("utf-8")
不清楚 json-tagger.herokuapp.com 期望的内容类型(示例相互矛盾)。您可以尝试 post 将数据作为文本:
#!/usr/bin/env python
import requests # pip install requests
r = requests.post(url,
data=text.encode('utf-8'),
headers={'Content-type': 'text/plain; charset=utf-8'})
print(r.json())
或者您可以尝试将其发送为 application/x-www-form-urlencoded
:
#!/usr/bin/env python
import requests # pip install requests
r = requests.post(url, data=dict(data=text))
print(r.json())
服务器可能同时拒绝、接受两者、只接受一个而不接受另一个,或者期望其他格式(例如,application/json
)等