java 将字符串数据映射到 python 中的字典
java map string data to dictionary in python
我正在从数据源获取 java 映射字符串。
{0={_shards={total=1, failed=0, successful=1, skipped=0},
hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc,
_source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c,
type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63,
version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z,
ecs={version=1.6.0}, log={file={path=/var/log/auth.log},
offset=46607}, service={type=system}, host={hostname=ubuntu_fresh,
os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu,
family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu},
containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598,
192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98,
08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth},
message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for
root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok
expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh
sshd[2662]: Failed password for root from 192.168.56.1 port 35830
ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z,
timezone=+00:00, module=system, dataset=system.auth}},
_id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}
我没有权限在 java 中转换它。
我有一个 python 应用程序,我想以 python 字典的形式访问该数据。
所以想把它转换成python字典。
Java 集合(Map
、List
等)的 .toString()
是有损的,因为它不会消除分隔符的歧义。因此,无法从 Map.toString()
的输出中 100% 可靠地重建数据结构。但是,如果对问题应用了一些约束:
- 键和值不包含某些字符(大约
{}=[],"
)
- 数组不包含原始值和 objects/arrays
的混合
然后我们可以稍微可靠地将 toString()
的输出转换为 JSON,然后将 JSON 解析为 Python 数据结构。我不会在生产中使用此代码,但只要您知道它会损坏,它在某些情况下可能会有用:
TEST_VALUE = "{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}"
def quote_value_array_values(match):
s = match.group()
qvalues = [f'"{value}"' for value in s.split(r", ")]
return ", ".join(qvalues)
def javastr_to_jsonstr(s):
import re
s = re.sub(r"(?<==\[)[^{\[\]]+(?=\])", quote_value_array_values, s)
s = re.sub(r'(?<={)([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
s = re.sub(r'(?<=, )([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
s = re.sub(r'(?<={)([^"=]+)=(?!")', r'"":', s)
s = re.sub(r'(?<=, )([^"=]+)=(?!")', r'"":', s)
return s
import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj, indent=1))
输出:
{
"0": {
"_shards": {
"total": "1",
"failed": "0",
"successful": "1",
"skipped": "0"
},
"hits": {
"hits": [
{
"_index": "filebeat-7.10.0-2021.02.02-000001",
"_type": "_doc",
"_source": {
"input": {
"type": "log"
},
"agent": {
"hostname": "ubuntu_fresh",
"name": "ubuntu_fresh",
"id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c",
"type": "filebeat",
"ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63",
"version": "7.10.0"
},
"@timestamp": "2021-02-04T12:36:33.475Z",
"ecs": {
"version": "1.6.0"
},
"log": {
"file": {
"path": "/var/log/auth.log"
},
"offset": "46607"
},
"service": {
"type": "system"
},
"host": {
"hostname": "ubuntu_fresh",
"os": {
"kernel": "4.15.0-135-generic",
"codename": "bionic",
"name": "Ubuntu",
"family": "debian",
"version": "18.04.1 LTS (Bionic Beaver)",
"platform": "ubuntu"
},
"containerized": "false",
"ip": [
"10.0.2.15",
"fe80::a00:27ff:fe82:f598",
"192.168.56.22",
"fe80::a00:27ff:fe32:fab0"
],
"name": "ubuntu_fresh",
"id": "cdfcdf6a39d44b98b2aa51700134f415",
"mac": [
"08:00:27:82:f5:98",
"08:00:27:32:fa:b0"
],
"architecture": "x86_64"
},
"fileset": {
"name": "auth"
},
"message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2",
"error": {
"message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
},
"event": {
"ingested": "2021-02-04T12:36:39.482598548Z",
"timezone": "+00:00",
"module": "system",
"dataset": "system.auth"
}
},
"_id": "nNALbXcBbfKg8Fh6Zci7",
"_score": "25.188179"
}
],
"total": {
"value": "1",
"relation": "eq"
},
"max_score": "25.188179"
},
"took": "1",
"timed_out": "false"
}
}
我正在从数据源获取 java 映射字符串。
{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}
我没有权限在 java 中转换它。 我有一个 python 应用程序,我想以 python 字典的形式访问该数据。 所以想把它转换成python字典。
Java 集合(Map
、List
等)的 .toString()
是有损的,因为它不会消除分隔符的歧义。因此,无法从 Map.toString()
的输出中 100% 可靠地重建数据结构。但是,如果对问题应用了一些约束:
- 键和值不包含某些字符(大约
{}=[],"
) - 数组不包含原始值和 objects/arrays 的混合
然后我们可以稍微可靠地将 toString()
的输出转换为 JSON,然后将 JSON 解析为 Python 数据结构。我不会在生产中使用此代码,但只要您知道它会损坏,它在某些情况下可能会有用:
TEST_VALUE = "{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}"
def quote_value_array_values(match):
s = match.group()
qvalues = [f'"{value}"' for value in s.split(r", ")]
return ", ".join(qvalues)
def javastr_to_jsonstr(s):
import re
s = re.sub(r"(?<==\[)[^{\[\]]+(?=\])", quote_value_array_values, s)
s = re.sub(r'(?<={)([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
s = re.sub(r'(?<=, )([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
s = re.sub(r'(?<={)([^"=]+)=(?!")', r'"":', s)
s = re.sub(r'(?<=, )([^"=]+)=(?!")', r'"":', s)
return s
import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj, indent=1))
输出:
{
"0": {
"_shards": {
"total": "1",
"failed": "0",
"successful": "1",
"skipped": "0"
},
"hits": {
"hits": [
{
"_index": "filebeat-7.10.0-2021.02.02-000001",
"_type": "_doc",
"_source": {
"input": {
"type": "log"
},
"agent": {
"hostname": "ubuntu_fresh",
"name": "ubuntu_fresh",
"id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c",
"type": "filebeat",
"ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63",
"version": "7.10.0"
},
"@timestamp": "2021-02-04T12:36:33.475Z",
"ecs": {
"version": "1.6.0"
},
"log": {
"file": {
"path": "/var/log/auth.log"
},
"offset": "46607"
},
"service": {
"type": "system"
},
"host": {
"hostname": "ubuntu_fresh",
"os": {
"kernel": "4.15.0-135-generic",
"codename": "bionic",
"name": "Ubuntu",
"family": "debian",
"version": "18.04.1 LTS (Bionic Beaver)",
"platform": "ubuntu"
},
"containerized": "false",
"ip": [
"10.0.2.15",
"fe80::a00:27ff:fe82:f598",
"192.168.56.22",
"fe80::a00:27ff:fe32:fab0"
],
"name": "ubuntu_fresh",
"id": "cdfcdf6a39d44b98b2aa51700134f415",
"mac": [
"08:00:27:82:f5:98",
"08:00:27:32:fa:b0"
],
"architecture": "x86_64"
},
"fileset": {
"name": "auth"
},
"message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2",
"error": {
"message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
},
"event": {
"ingested": "2021-02-04T12:36:39.482598548Z",
"timezone": "+00:00",
"module": "system",
"dataset": "system.auth"
}
},
"_id": "nNALbXcBbfKg8Fh6Zci7",
"_score": "25.188179"
}
],
"total": {
"value": "1",
"relation": "eq"
},
"max_score": "25.188179"
},
"took": "1",
"timed_out": "false"
}
}