需要帮助解析 filebeat json

need help parsing the filebeat json

我想将日志文件的每一行作为 json 文档发送到 elastic。
我有一个如下所示的日志文件:

{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:35:16.397023'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:35:16.397296'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.357111'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:39:58.357738'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:39:58.357904'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:39:58.358029'}
{'client_id': 1, 'logger': 'instameister_event', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358149'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '1', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358363'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '2', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358562'}
{'client_id': 1, 'logger': 'instameister_event', 'event': '3', 'level': 'info', 'date_created': '2022-02-23T11:39:58.358728'}
{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T11:41:00.466514'}
{'client_id': 1, 'logger': 'instameister', 'event': '1', 'level': 'warning', 'date_created': '2022-02-23T11:41:00.466931'}
{'client_id': 1, 'logger': 'instameister', 'event': '2', 'level': 'error', 'date_created': '2022-02-23T11:41:00.467042'}
{'client_id': 1, 'logger': 'instameister', 'event': '3', 'level': 'critical', 'date_created': '2022-02-23T11:41:00.467141'}

我的 filebeat 配置如下:

filebeat.inputs:
- type: log
  paths: /home/philip/Devel/InstaMeister/instameister.log
  json.keys_under_root: true
  json.overwrite_keys: true


output.logstash:
  hosts: ["219.34.99.125:5044"]

这是我的 logstash 管道配置:

input {
  beats {
    port => 5044
  }
}

output {
  elasticsearch {
    hosts => ["http://10.136.95.164:9200"]
    user => "elastic"
    password => ""
    index => "instameister"
    manage_template => false
  }
  stdout { codec => json_lines }
}

似乎 filebeat 没有将密钥放在根文档下,因为当我的文件发送到 elastic 时,文档看起来像这样:

{
  "_index": "instameister",
  "_id": "-6zaJn8BxuuGm2MUXt8x",
  "_version": 1,
  "_score": 1,
  "_source": {
    "message": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}",
    "@timestamp": "2022-02-23T13:51:56.173Z",
    "json": {},
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "event": {
      "original": "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    },
    "@version": "1",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 21192
    }
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "tags.keyword": [
      "beats_input_codec_plain_applied"
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "event.original": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      21192
    ],
    "message": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "tags": [
      "beats_input_codec_plain_applied"
    ],
    "@timestamp": [
      "2022-02-23T13:51:56.173Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "message.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "event.original.keyword": [
      "{'client_id': 1, 'logger': 'instameister', 'event': 'test', 'level': 'info', 'date_created': '2022-02-23T14:51:54.733358'}"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

我根本不关心 filebeat 元数据。我只想将我的日志文件中的每一行作为一个单独的弹性文档。但是我很难寻找如何实现这一目标。有人可以帮我吗?

正如@Val 建议的那样,单引号无效 json。我更新了我的应用程序以将有效的 json(双引号)输出到日志文件,现在所有内容都将密钥附加到“根”文档。

{
  "_index": "instameister",
  "_id": "HqwUJ38BxuuGm2MUgOAA",
  "_version": 1,
  "_score": 1,
  "_source": {
    "@timestamp": "2022-02-23T14:55:26.510Z",
    "input": {
      "type": "log"
    },
    "host": {
      "name": "ThinkPad-T490"
    },
    "ecs": {
      "version": "8.0.0"
    },
    "tags": [
      "beats_input_raw_event"
    ],
    "event": "1",
    "level": "warning",
    "client_id": 1,
    "@version": "1",
    "logger": "instameister",
    "agent": {
      "name": "ThinkPad-T490",
      "version": "8.0.0",
      "ephemeral_id": "7d63964b-eb3a-479c-8cb6-e2345e67dea9",
      "id": "001286a0-0ce8-4bf5-a9dc-41798923cae7",
      "type": "filebeat"
    },
    "log": {
      "file": {
        "path": "/home/philip/Devel/InstaMeister/instameister.log"
      },
      "offset": 1488
    },
    "date_created": "2022-02-23T15:55:17.234136"
  },
  "fields": {
    "agent.version.keyword": [
      "8.0.0"
    ],
    "input.type.keyword": [
      "log"
    ],
    "host.name.keyword": [
      "ThinkPad-T490"
    ],
    "logger": [
      "instameister"
    ],
    "tags.keyword": [
      "beats_input_raw_event"
    ],
    "client_id": [
      1
    ],
    "agent.type": [
      "filebeat"
    ],
    "ecs.version.keyword": [
      "8.0.0"
    ],
    "@version": [
      "1"
    ],
    "agent.name": [
      "ThinkPad-T490"
    ],
    "host.name": [
      "ThinkPad-T490"
    ],
    "event": [
      "1"
    ],
    "log.file.path.keyword": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.type.keyword": [
      "filebeat"
    ],
    "event.keyword": [
      "1"
    ],
    "agent.ephemeral_id.keyword": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.name.keyword": [
      "ThinkPad-T490"
    ],
    "level": [
      "warning"
    ],
    "date_created": [
      "2022-02-23T15:55:17.234Z"
    ],
    "agent.id.keyword": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "input.type": [
      "log"
    ],
    "@version.keyword": [
      "1"
    ],
    "log.offset": [
      1488
    ],
    "tags": [
      "beats_input_raw_event"
    ],
    "@timestamp": [
      "2022-02-23T14:55:26.510Z"
    ],
    "agent.id": [
      "001286a0-0ce8-4bf5-a9dc-41798923cae7"
    ],
    "level.keyword": [
      "warning"
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "logger.keyword": [
      "instameister"
    ],
    "log.file.path": [
      "/home/philip/Devel/InstaMeister/instameister.log"
    ],
    "agent.ephemeral_id": [
      "7d63964b-eb3a-479c-8cb6-e2345e67dea9"
    ],
    "agent.version": [
      "8.0.0"
    ]
  }
}

我仍然想删除 filebeat 创建的冗余密钥。不过现在我的主要问题已经解决了。
编辑:
我通过添加这个去掉了额外的键:

processors:
  - drop_fields:
      fields: ["date_created", "ecs.version", "agent.version", "agent.type", "agent.id", "agent.name", "input.type", "log.file.path", "log.offset", "agent.ephemeral_id", "host.name"]

到我的 filebeat 配置