使用 python 从 json 响应中删除 unicode 字符串和空格
Remove unicode string and spaces from json response using python
我正在将数据发布到 REST API,作为响应,我正在接收数据作为 json 响应。此响应具有 unicode 字符。
我尝试了很多方法来从我的 json 数据中删除 unicode 字符,但没有任何效果 :( 在删除 unicode 字符后,我正在验证模式。
以下是我的代码片段:
import json
import logging
from jsonschema import validate
if __name__ == '__main__':
schema = {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"SOURCE": {
"type": "string"
},
"TIMESTAMP": {
"type": "string"
},
"TAGERRORS": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"TAGNAME": {
"type": "string"
},
"ERROR": {
"type": "string"
}
},
"required": [
"TAGNAME",
"ERROR"
]
}
]
}
},
"required": [
"SOURCE",
"TIMESTAMP",
"TAGERRORS"
]
}
response_dict ='"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"'
response_dict = response_dict.replace("\u000d\u000a\s*", "")
print(response_dict)
my_json = json.loads(response_dict)
# validate(instance=my_json, schema=schema)
# print(my_json)
response_dict = response_dict.replace("\u000d\u000a\s*", "")
不工作并给出以下结果。
"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"
此外,我尝试了以下正则表达式来删除 unicode 字符,但它在模式验证期间失败了。
import re
def removeunicode(text):
text = re.sub(r'\[u]\S\S\S\S[s]', "", text)
text = re.sub(r'\[u]\S\S\S\S', "", text)
return text
my_json = json.loads(removeunicode(response_dict))
能否请您帮助解决问题。谢谢。
是double-JSON-encoded。只需 json.loads
响应两次即可修复它,但如果您可以修复上游问题:
# From OP's example
>>> response_dict ='"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"'
>>> print(response_dict) # This is valid JSON
"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"
>>> json.loads(response_dict)
'{\r\n "SOURCE": "APPDEV",\r\n "TIMESTAMP": "2022-04-19 12:29:27",\r\n "TAGERRORS": []\r\n}'
>>> json.loads(json.loads(response_dict))
{'SOURCE': 'APPDEV', 'TIMESTAMP': '2022-04-19 12:29:27', 'TAGERRORS': []}
我正在将数据发布到 REST API,作为响应,我正在接收数据作为 json 响应。此响应具有 unicode 字符。 我尝试了很多方法来从我的 json 数据中删除 unicode 字符,但没有任何效果 :( 在删除 unicode 字符后,我正在验证模式。 以下是我的代码片段:
import json
import logging
from jsonschema import validate
if __name__ == '__main__':
schema = {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"SOURCE": {
"type": "string"
},
"TIMESTAMP": {
"type": "string"
},
"TAGERRORS": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"TAGNAME": {
"type": "string"
},
"ERROR": {
"type": "string"
}
},
"required": [
"TAGNAME",
"ERROR"
]
}
]
}
},
"required": [
"SOURCE",
"TIMESTAMP",
"TAGERRORS"
]
}
response_dict ='"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"'
response_dict = response_dict.replace("\u000d\u000a\s*", "")
print(response_dict)
my_json = json.loads(response_dict)
# validate(instance=my_json, schema=schema)
# print(my_json)
response_dict = response_dict.replace("\u000d\u000a\s*", "")
不工作并给出以下结果。
"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"
此外,我尝试了以下正则表达式来删除 unicode 字符,但它在模式验证期间失败了。
import re
def removeunicode(text):
text = re.sub(r'\[u]\S\S\S\S[s]', "", text)
text = re.sub(r'\[u]\S\S\S\S', "", text)
return text
my_json = json.loads(removeunicode(response_dict))
能否请您帮助解决问题。谢谢。
是double-JSON-encoded。只需 json.loads
响应两次即可修复它,但如果您可以修复上游问题:
# From OP's example
>>> response_dict ='"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"'
>>> print(response_dict) # This is valid JSON
"{\u000d\u000a \"SOURCE\": \"APPDEV\",\u000d\u000a \"TIMESTAMP\": \"2022-04-19 12:29:27\",\u000d\u000a \"TAGERRORS\": []\u000d\u000a}"
>>> json.loads(response_dict)
'{\r\n "SOURCE": "APPDEV",\r\n "TIMESTAMP": "2022-04-19 12:29:27",\r\n "TAGERRORS": []\r\n}'
>>> json.loads(json.loads(response_dict))
{'SOURCE': 'APPDEV', 'TIMESTAMP': '2022-04-19 12:29:27', 'TAGERRORS': []}