java 将字符串数据映射到 python 中的字典

java map string data to dictionary in python

我正在从数据源获取 java 映射字符串。

{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}

我没有权限在 java 中转换它。 我有一个 python 应用程序,我想以 python 字典的形式访问该数据。 所以想把它转换成python字典。

Java 集合(MapList 等)的 .toString() 是有损的,因为它不会消除分隔符的歧义。因此,无法从 Map.toString() 的输出中 100% 可靠地重建数据结构。但是,如果对问题应用了一些约束:

  1. 键和值不包含某些字符(大约{}=[],"
  2. 数组不包含原始值和 objects/arrays
  3. 的混合

然后我们可以稍微可靠地将 toString() 的输出转换为 JSON,然后将 JSON 解析为 Python 数据结构。我不会在生产中使用此代码,但只要您知道它会损坏,它在某些情况下可能会有用:

TEST_VALUE = "{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}"

def quote_value_array_values(match):
    s = match.group()
    qvalues = [f'"{value}"' for value in s.split(r", ")]
    return ", ".join(qvalues)

def javastr_to_jsonstr(s):
    import re
    s = re.sub(r"(?<==\[)[^{\[\]]+(?=\])", quote_value_array_values, s)
    s = re.sub(r'(?<={)([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
    s = re.sub(r'(?<=, )([^"=]+)[=:](?!{|\[)([^,}]+)', r'"":""', s)
    s = re.sub(r'(?<={)([^"=]+)=(?!")', r'"":', s)
    s = re.sub(r'(?<=, )([^"=]+)=(?!")', r'"":', s)
    return s

import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj, indent=1))

输出:

{
 "0": {
  "_shards": {
   "total": "1",
   "failed": "0",
   "successful": "1",
   "skipped": "0"
  },
  "hits": {
   "hits": [
    {
     "_index": "filebeat-7.10.0-2021.02.02-000001",
     "_type": "_doc",
     "_source": {
      "input": {
       "type": "log"
      },
      "agent": {
       "hostname": "ubuntu_fresh",
       "name": "ubuntu_fresh",
       "id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c",
       "type": "filebeat",
       "ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63",
       "version": "7.10.0"
      },
      "@timestamp": "2021-02-04T12:36:33.475Z",
      "ecs": {
       "version": "1.6.0"
      },
      "log": {
       "file": {
        "path": "/var/log/auth.log"
       },
       "offset": "46607"
      },
      "service": {
       "type": "system"
      },
      "host": {
       "hostname": "ubuntu_fresh",
       "os": {
        "kernel": "4.15.0-135-generic",
        "codename": "bionic",
        "name": "Ubuntu",
        "family": "debian",
        "version": "18.04.1 LTS (Bionic Beaver)",
        "platform": "ubuntu"
       },
       "containerized": "false",
       "ip": [
        "10.0.2.15",
        "fe80::a00:27ff:fe82:f598",
        "192.168.56.22",
        "fe80::a00:27ff:fe32:fab0"
       ],
       "name": "ubuntu_fresh",
       "id": "cdfcdf6a39d44b98b2aa51700134f415",
       "mac": [
        "08:00:27:82:f5:98",
        "08:00:27:32:fa:b0"
       ],
       "architecture": "x86_64"
      },
      "fileset": {
       "name": "auth"
      },
      "message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2",
      "error": {
       "message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
      },
      "event": {
       "ingested": "2021-02-04T12:36:39.482598548Z",
       "timezone": "+00:00",
       "module": "system",
       "dataset": "system.auth"
      }
     },
     "_id": "nNALbXcBbfKg8Fh6Zci7",
     "_score": "25.188179"
    }
   ],
   "total": {
    "value": "1",
    "relation": "eq"
   },
   "max_score": "25.188179"
  },
  "took": "1",
  "timed_out": "false"
 }
}