simplejson.scanner.JSONDecodeError: Invalid \X escape sequence u's': line 1 column 468 (char 467)
simplejson.scanner.JSONDecodeError: Invalid \X escape sequence u's': line 1 column 468 (char 467)
我在向 REST api 发出 HTTP 请求时遇到问题,我的 JSON 格式的响应格式不正确。 JSON 中的转义序列被正确解释,但随后有一个特定的句子包含单词 "inner\spiritual",使 JSON 解码器认为“\s”是转义顺序,当它不应该是。
通过 Whosebug 文章搜索,我找不到适合我的精确用例的解决方案,但我想出了一个很好的 hack 使用异常和索引 JSON 字符串并进行简单替换。我认为值得分享,因为它可能会帮助处于类似情况的人。快乐黑客:)
完整代码:
import requests
import os
import json
base_url = 'http://www.omdbapi.com/'
tv_series = {}
films = {}
#for i in range(1, 9999999):
#imdb_id = 'tt' + str(i).zfill(7)
imdb_id = 'tt0120690'
print imdb_id
payload = {
'i':imdb_id,
'plot':'full',
'r':'json'
}
response = requests.get(base_url, params=payload)
if response.status_code == 200:
result = None
result = response.json()
if result != None:
if result['Response'] != 'False':
if result['Type'] == 'movie':
films[result['Title']] = result
elif result['Type'] == 'series':
tv_series[result['Title']] = result
else:
print '[ERROR] Type:', result['Type']
with open('tv_series.json', 'w') as tv_series_outfile:
json.dump(tv_series, tv_series_outfile)
with open('films.json', 'w') as films_outfile:
json.dump(films, films_outfile)
错误:
Traceback (most recent call last):
File "import_imdb.py", line 41, in <module>
result = response.json()
File "C:\Python27\lib\site-packages\requests\models.py", line 797, in json
return json.loads(self.text, **kwargs)
File "C:\Python27\lib\site-packages\simplejson\__init__.py", line 516, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 127, in scan_once
return _scan_once(string, idx)
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 93, in _scan_once
_scan_once, object_hook, object_pairs_hook, memo)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 194, in JSONObject
value, end = scan_once(s, end)
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 90, in _scan_once
return parse_string(string, idx + 1, encoding, strict)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 99, in py_scanstring
raise JSONDecodeError(msg, s, end)
simplejson.scanner.JSONDecodeError: Invalid \X escape sequence u's': line 1 column 468 (char 467)
我的修复:
def fix_JSON(json_message=None):
result = None
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(e.message.split(' ')[-1].replace(')',''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result
完整代码:
import requests
import os
import json
def fix_JSON(json_message=None):
result = None
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(e.message.split(' ')[-1].replace(')',''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result
base_url = 'http://www.omdbapi.com/'
tv_series = {}
films = {}
for i in range(1, 9999999):
imdb_id = 'tt' + str(i).zfill(7)
#imdb_id = 'tt0120690'
print imdb_id
payload = {
'i':imdb_id,
'plot':'full',
'r':'json'
}
response = requests.get(base_url, params=payload)
if response.status_code == 200:
result = None
result = fix_JSON(json_message=response.content)
if result != None:
if result['Response'] != 'False':
if result['Type'] == 'movie':
films[result['Title']] = result
elif result['Type'] == 'series':
tv_series[result['Title']] = result
else:
print '[ERROR] Type:', result['Type']
with open('tv_series.json', 'w') as tv_series_outfile:
json.dump(tv_series, tv_series_outfile)
with open('films.json', 'w') as films_outfile:
json.dump(films, films_outfile)
我在向 REST api 发出 HTTP 请求时遇到问题,我的 JSON 格式的响应格式不正确。 JSON 中的转义序列被正确解释,但随后有一个特定的句子包含单词 "inner\spiritual",使 JSON 解码器认为“\s”是转义顺序,当它不应该是。
通过 Whosebug 文章搜索,我找不到适合我的精确用例的解决方案,但我想出了一个很好的 hack 使用异常和索引 JSON 字符串并进行简单替换。我认为值得分享,因为它可能会帮助处于类似情况的人。快乐黑客:)
完整代码:
import requests
import os
import json
base_url = 'http://www.omdbapi.com/'
tv_series = {}
films = {}
#for i in range(1, 9999999):
#imdb_id = 'tt' + str(i).zfill(7)
imdb_id = 'tt0120690'
print imdb_id
payload = {
'i':imdb_id,
'plot':'full',
'r':'json'
}
response = requests.get(base_url, params=payload)
if response.status_code == 200:
result = None
result = response.json()
if result != None:
if result['Response'] != 'False':
if result['Type'] == 'movie':
films[result['Title']] = result
elif result['Type'] == 'series':
tv_series[result['Title']] = result
else:
print '[ERROR] Type:', result['Type']
with open('tv_series.json', 'w') as tv_series_outfile:
json.dump(tv_series, tv_series_outfile)
with open('films.json', 'w') as films_outfile:
json.dump(films, films_outfile)
错误:
Traceback (most recent call last):
File "import_imdb.py", line 41, in <module>
result = response.json()
File "C:\Python27\lib\site-packages\requests\models.py", line 797, in json
return json.loads(self.text, **kwargs)
File "C:\Python27\lib\site-packages\simplejson\__init__.py", line 516, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 127, in scan_once
return _scan_once(string, idx)
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 93, in _scan_once
_scan_once, object_hook, object_pairs_hook, memo)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 194, in JSONObject
value, end = scan_once(s, end)
File "C:\Python27\lib\site-packages\simplejson\scanner.py", line 90, in _scan_once
return parse_string(string, idx + 1, encoding, strict)
File "C:\Python27\lib\site-packages\simplejson\decoder.py", line 99, in py_scanstring
raise JSONDecodeError(msg, s, end)
simplejson.scanner.JSONDecodeError: Invalid \X escape sequence u's': line 1 column 468 (char 467)
我的修复:
def fix_JSON(json_message=None):
result = None
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(e.message.split(' ')[-1].replace(')',''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result
完整代码:
import requests
import os
import json
def fix_JSON(json_message=None):
result = None
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(e.message.split(' ')[-1].replace(')',''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result
base_url = 'http://www.omdbapi.com/'
tv_series = {}
films = {}
for i in range(1, 9999999):
imdb_id = 'tt' + str(i).zfill(7)
#imdb_id = 'tt0120690'
print imdb_id
payload = {
'i':imdb_id,
'plot':'full',
'r':'json'
}
response = requests.get(base_url, params=payload)
if response.status_code == 200:
result = None
result = fix_JSON(json_message=response.content)
if result != None:
if result['Response'] != 'False':
if result['Type'] == 'movie':
films[result['Title']] = result
elif result['Type'] == 'series':
tv_series[result['Title']] = result
else:
print '[ERROR] Type:', result['Type']
with open('tv_series.json', 'w') as tv_series_outfile:
json.dump(tv_series, tv_series_outfile)
with open('films.json', 'w') as films_outfile:
json.dump(films, films_outfile)