Logstash:展平嵌套 JSON,组合数组内的字段
Logstash: Flatten nested JSON, combine fields inside array
我有一个 JSON 看起来像这样:
{
"foo": {
"bar": {
"type": "someType",
"id": "ga241ghs"
},
"tags": [
{
"@tagId": "123",
"tagAttributes": {
"attr1": "AAA",
"attr2": "111"
}
},
{
"@tagId": "456",
"tagAttributes": {
"attr1": "BBB",
"attr2": "222"
}
}
]
},
"text": "My text"
}
实际上它并没有分成多行(只是为了更好地概述),所以它看起来像这样:
{"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":404,"attr2":416}},{"@tagId":"456","tagAttributes":{"attr1":1096,"attr2":1103}}]},"text":"My text"}
我想用 Logstash 将这个 JSON 插入到 Elasticsearch 索引中。但是,我想插入一个扁平化的 JSON,数组中的字段组合如下:
"foo.bar.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]
总的来说,插入到 Elasticsearch 的数据应该是这样的:
"foo.bar.type": "someType"
"foo.bar.id": "ga241ghs"
"foo.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]
"foo.text": "My text"
这是我当前的 Logstash .conf;我能够拆分 "tags" 数组,但现在我得到了 2 个条目。
现在如何将所有 tagId 连接到一个字段,将数组的 attr1 值连接到一个字段,并将所有 attr2 值连接到另一个字段?
input {
file {
codec => json
path => ["/path/to/my/data/*.json"]
mode => "read"
file_completed_action => "log"
file_completed_log_path => ["/path/to/my/logfile"]
sincedb_path => "/dev/null"
}
}
filter {
split {
field => "[foo][tags]"
}
}
output {
stdout { codec => rubydebug }
}
非常感谢!
我的 JSON 迭代器 IIFE 的好例子 - 不需要复杂的算法,只需选择 DepthFirst,稍微修改的路径(新 "raw" 版本)就是这样。
如果您喜欢这个 JS 答案,请注意在投票按钮下勾选接受标志。
如果您想要不同的语言,也可以在相同的 GitHub.
上使用具有相似迭代器的 C# 解析器
var src = {"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":"AAA","attr2":"111"}},{"@tagId":"456","tagAttributes":{"attr1":"BBB","attr2":"222"}}],"text":"My text"}};
//console.log(JSON.stringify(src, null, 2));
function traverse(it) {
var dest = {};
var i=0;
do {
if (it.Current().HasStringValue()) {
var pathKey = it.Path(true).join('.');
var check = dest[pathKey];
if (check) {
if (!(check instanceof Array)) dest[pathKey] = [check];
dest[pathKey].push(it.Value());
} else {
dest[pathKey] = it.Value();
}
}
//console.log(it.Level + '\t' + it.Path(1).join('.') + '\t' + it.KeyDots(), (it.Value() instanceof Object) ? "-" : it.Value());
} while (it.DepthFirst());
console.log(JSON.stringify(dest, null, 2));
return dest;
}
/*
* https://github.com/eltomjan/ETEhomeTools/blob/master/HTM_HTA/JSON_Iterator_IIFE.js
* +new raw Path feature
*/
'use strict';
var JNode = (function (jsNode) {
function JNode(_parent, _pred, _key, _value) {
this.parent = _parent;
this.pred = _pred;
this.node = null;
this.next = null;
this.key = _key;
this.value = _value;
}
JNode.prototype.HasOwnKey = function () { return this.key && (typeof this.key != "number"); }
JNode.prototype.HasStringValue = function () { return !(this.value instanceof Object); }
return JNode;
})();
var JIterator = (function (json) {
var root, current, maxLevel = -1;
function JIterator(json, parent) {
if (parent === undefined) parent = null;
var pred = null, localCurrent;
for (var child in json) {
var obj = json[child] instanceof Object;
if (json instanceof Array) child = parseInt(child); // non-associative array
if (!root) root = localCurrent = new JNode(parent, null, child, json[child]);
else {
localCurrent = new JNode(parent, pred, child, obj ? ((json[child] instanceof Array) ? [] : {}) : json[child]);
}
if (pred) pred.next = localCurrent;
if (parent && parent.node == null) parent.node = localCurrent;
pred = localCurrent;
if (obj) {
var memPred = pred;
JIterator(json[child], pred);
pred = memPred;
}
}
if (this) {
current = root;
this.Level = 0;
}
}
JIterator.prototype.Current = function () { return current; }
JIterator.prototype.SetCurrent = function (newCurrent) {
current = newCurrent;
this.Level = 0;
while(newCurrent = newCurrent.parent) this.Level++;
}
JIterator.prototype.Parent = function () {
var retVal = current.parent;
if (retVal == null) return false;
this.Level--;
return current = retVal;
}
JIterator.prototype.Pred = function () {
var retVal = current.pred;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Node = function () {
var retVal = current.node;
if (retVal == null) return false;
this.Level++;
return current = retVal;
}
JIterator.prototype.Next = function () {
var retVal = current.next;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Key = function () { return current.key; }
JIterator.prototype.KeyDots = function () { return (typeof (current.key) == "number") ? "" : (current.key + ':'); }
JIterator.prototype.Value = function () { return current.value; }
JIterator.prototype.Reset = function () {
current = root;
this.Level = 0;
}
JIterator.prototype.RawPath = function () {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
steps.push(level.key + (level.value instanceof Array ? "[]" : "{}"));
} else {
if (level != null) steps.push(level.key);
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.Path = function (raw) {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
var size = 0;
var items = level.node;
if (typeof (level.key) == "number" && !raw) steps.push('[' + level.key + ']');
else {
if(raw) {
if (typeof (level.key) != "number") steps.push(level.key);
} else {
while (items) {
size++;
items = items.next;
}
var type = (level.value instanceof Array ? "[]" : "{}");
var prev = steps[steps.length - 1];
if (prev && prev[0] == '[') {
var last = prev.length - 1;
if (prev[last] == ']') {
last--;
if (!isNaN(prev.substr(1, last))) {
steps.pop();
size += '.' + prev.substr(1, last);
}
}
}
steps.push(level.key + type[0] + size + type[1]);
}
}
} else {
if (level != null) {
if (typeof (level.key) == "number") steps.push('[' + level.key + ']');
else steps.push(level.key);
}
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.DepthFirst = function () {
if (current == null) return 0; // exit sign
if (current.node != null) {
current = current.node;
this.Level++;
if (maxLevel < this.Level) maxLevel = this.Level;
return 1; // moved down
} else if (current.next != null) {
current = current.next;
return 2; // moved right
} else {
while (current != null) {
if (current.next != null) {
current = current.next;
return 3; // returned up & moved next
}
this.Level--;
current = current.parent;
}
}
return 0; // exit sign
}
JIterator.prototype.BreadthFirst = function () {
if (current == null) return 0; // exit sign
if (current.next) {
current = current.next;
return 1; // moved right
} else if (current.parent) {
var level = this.Level, point = current;
while (this.DepthFirst() && level != this.Level);
if (current) return 2; // returned up & moved next
do {
this.Reset();
level++;
while (this.DepthFirst() && level != this.Level);
if (current) return 3; // returned up & moved next
} while (maxLevel >= level);
return current != null ? 3 : 0;
} else if (current.node) {
current = current.node;
return 3;
} else if (current.pred) {
while (current.pred) current = current.pred;
while (current && !current.node) current = current.next;
if (!current) return null;
else return this.DepthFirst();
}
}
JIterator.prototype.ReadArray = function () {
var retVal = {};
var item = current;
do {
if (item.value instanceof Object) {
if (item.value.length == 0) retVal[item.key] = item.node;
else retVal[item.key] = item;
} else retVal[item.key] = item.value;
item = item.next;
} while (item != null);
return retVal;
}
JIterator.prototype.FindKey = function (key) {
var pos = current;
while (current && current.key != key) this.DepthFirst();
if (current.key == key) {
var retVal = current;
current = pos;
return retVal;
} else {
current = pos;
return null;
}
}
return JIterator;
})();
traverse(new JIterator(src));
您的简短 JSON 版本不同,现在使用这个版本,看起来像您需要的结果(attrs 已更改并且 text 从 foo 下的根移动):
{
"foo": {
"bar": {
"type": "someType",
"id": "ga241ghs"
},
"tags": [
{
"@tagId": "123",
"tagAttributes": {
"attr1": "AAA",
"attr2": "111"
}
},
{
"@tagId": "456",
"tagAttributes": {
"attr1": "BBB",
"attr2": "222"
}
}
],
"text": "My text"
}
}
弄清楚了如何直接在 Logstash 中使用 Ruby 过滤器来完成此操作 - 对于将来所有搜索此内容的人来说,这里是一个关于如何为 @tagId 执行此操作的示例:
filter {
ruby { code => '
i = 0
tagId_array = Array.new
while i < event.get( "[foo][tags]" ).length do
tagId_array = tagId_array.push(event.get( "[foo][tags][" + i.to_s + "][@tagId]" ))
i += 1
end
event.set( "foo.tags.tagId", tagId_array )
'
}
}
我有一个 JSON 看起来像这样:
{
"foo": {
"bar": {
"type": "someType",
"id": "ga241ghs"
},
"tags": [
{
"@tagId": "123",
"tagAttributes": {
"attr1": "AAA",
"attr2": "111"
}
},
{
"@tagId": "456",
"tagAttributes": {
"attr1": "BBB",
"attr2": "222"
}
}
]
},
"text": "My text"
}
实际上它并没有分成多行(只是为了更好地概述),所以它看起来像这样:
{"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":404,"attr2":416}},{"@tagId":"456","tagAttributes":{"attr1":1096,"attr2":1103}}]},"text":"My text"}
我想用 Logstash 将这个 JSON 插入到 Elasticsearch 索引中。但是,我想插入一个扁平化的 JSON,数组中的字段组合如下:
"foo.bar.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]
总的来说,插入到 Elasticsearch 的数据应该是这样的:
"foo.bar.type": "someType"
"foo.bar.id": "ga241ghs"
"foo.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]
"foo.text": "My text"
这是我当前的 Logstash .conf;我能够拆分 "tags" 数组,但现在我得到了 2 个条目。
现在如何将所有 tagId 连接到一个字段,将数组的 attr1 值连接到一个字段,并将所有 attr2 值连接到另一个字段?
input {
file {
codec => json
path => ["/path/to/my/data/*.json"]
mode => "read"
file_completed_action => "log"
file_completed_log_path => ["/path/to/my/logfile"]
sincedb_path => "/dev/null"
}
}
filter {
split {
field => "[foo][tags]"
}
}
output {
stdout { codec => rubydebug }
}
非常感谢!
我的 JSON 迭代器 IIFE 的好例子 - 不需要复杂的算法,只需选择 DepthFirst,稍微修改的路径(新 "raw" 版本)就是这样。 如果您喜欢这个 JS 答案,请注意在投票按钮下勾选接受标志。
如果您想要不同的语言,也可以在相同的 GitHub.
上使用具有相似迭代器的 C# 解析器var src = {"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":"AAA","attr2":"111"}},{"@tagId":"456","tagAttributes":{"attr1":"BBB","attr2":"222"}}],"text":"My text"}};
//console.log(JSON.stringify(src, null, 2));
function traverse(it) {
var dest = {};
var i=0;
do {
if (it.Current().HasStringValue()) {
var pathKey = it.Path(true).join('.');
var check = dest[pathKey];
if (check) {
if (!(check instanceof Array)) dest[pathKey] = [check];
dest[pathKey].push(it.Value());
} else {
dest[pathKey] = it.Value();
}
}
//console.log(it.Level + '\t' + it.Path(1).join('.') + '\t' + it.KeyDots(), (it.Value() instanceof Object) ? "-" : it.Value());
} while (it.DepthFirst());
console.log(JSON.stringify(dest, null, 2));
return dest;
}
/*
* https://github.com/eltomjan/ETEhomeTools/blob/master/HTM_HTA/JSON_Iterator_IIFE.js
* +new raw Path feature
*/
'use strict';
var JNode = (function (jsNode) {
function JNode(_parent, _pred, _key, _value) {
this.parent = _parent;
this.pred = _pred;
this.node = null;
this.next = null;
this.key = _key;
this.value = _value;
}
JNode.prototype.HasOwnKey = function () { return this.key && (typeof this.key != "number"); }
JNode.prototype.HasStringValue = function () { return !(this.value instanceof Object); }
return JNode;
})();
var JIterator = (function (json) {
var root, current, maxLevel = -1;
function JIterator(json, parent) {
if (parent === undefined) parent = null;
var pred = null, localCurrent;
for (var child in json) {
var obj = json[child] instanceof Object;
if (json instanceof Array) child = parseInt(child); // non-associative array
if (!root) root = localCurrent = new JNode(parent, null, child, json[child]);
else {
localCurrent = new JNode(parent, pred, child, obj ? ((json[child] instanceof Array) ? [] : {}) : json[child]);
}
if (pred) pred.next = localCurrent;
if (parent && parent.node == null) parent.node = localCurrent;
pred = localCurrent;
if (obj) {
var memPred = pred;
JIterator(json[child], pred);
pred = memPred;
}
}
if (this) {
current = root;
this.Level = 0;
}
}
JIterator.prototype.Current = function () { return current; }
JIterator.prototype.SetCurrent = function (newCurrent) {
current = newCurrent;
this.Level = 0;
while(newCurrent = newCurrent.parent) this.Level++;
}
JIterator.prototype.Parent = function () {
var retVal = current.parent;
if (retVal == null) return false;
this.Level--;
return current = retVal;
}
JIterator.prototype.Pred = function () {
var retVal = current.pred;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Node = function () {
var retVal = current.node;
if (retVal == null) return false;
this.Level++;
return current = retVal;
}
JIterator.prototype.Next = function () {
var retVal = current.next;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Key = function () { return current.key; }
JIterator.prototype.KeyDots = function () { return (typeof (current.key) == "number") ? "" : (current.key + ':'); }
JIterator.prototype.Value = function () { return current.value; }
JIterator.prototype.Reset = function () {
current = root;
this.Level = 0;
}
JIterator.prototype.RawPath = function () {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
steps.push(level.key + (level.value instanceof Array ? "[]" : "{}"));
} else {
if (level != null) steps.push(level.key);
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.Path = function (raw) {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
var size = 0;
var items = level.node;
if (typeof (level.key) == "number" && !raw) steps.push('[' + level.key + ']');
else {
if(raw) {
if (typeof (level.key) != "number") steps.push(level.key);
} else {
while (items) {
size++;
items = items.next;
}
var type = (level.value instanceof Array ? "[]" : "{}");
var prev = steps[steps.length - 1];
if (prev && prev[0] == '[') {
var last = prev.length - 1;
if (prev[last] == ']') {
last--;
if (!isNaN(prev.substr(1, last))) {
steps.pop();
size += '.' + prev.substr(1, last);
}
}
}
steps.push(level.key + type[0] + size + type[1]);
}
}
} else {
if (level != null) {
if (typeof (level.key) == "number") steps.push('[' + level.key + ']');
else steps.push(level.key);
}
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.DepthFirst = function () {
if (current == null) return 0; // exit sign
if (current.node != null) {
current = current.node;
this.Level++;
if (maxLevel < this.Level) maxLevel = this.Level;
return 1; // moved down
} else if (current.next != null) {
current = current.next;
return 2; // moved right
} else {
while (current != null) {
if (current.next != null) {
current = current.next;
return 3; // returned up & moved next
}
this.Level--;
current = current.parent;
}
}
return 0; // exit sign
}
JIterator.prototype.BreadthFirst = function () {
if (current == null) return 0; // exit sign
if (current.next) {
current = current.next;
return 1; // moved right
} else if (current.parent) {
var level = this.Level, point = current;
while (this.DepthFirst() && level != this.Level);
if (current) return 2; // returned up & moved next
do {
this.Reset();
level++;
while (this.DepthFirst() && level != this.Level);
if (current) return 3; // returned up & moved next
} while (maxLevel >= level);
return current != null ? 3 : 0;
} else if (current.node) {
current = current.node;
return 3;
} else if (current.pred) {
while (current.pred) current = current.pred;
while (current && !current.node) current = current.next;
if (!current) return null;
else return this.DepthFirst();
}
}
JIterator.prototype.ReadArray = function () {
var retVal = {};
var item = current;
do {
if (item.value instanceof Object) {
if (item.value.length == 0) retVal[item.key] = item.node;
else retVal[item.key] = item;
} else retVal[item.key] = item.value;
item = item.next;
} while (item != null);
return retVal;
}
JIterator.prototype.FindKey = function (key) {
var pos = current;
while (current && current.key != key) this.DepthFirst();
if (current.key == key) {
var retVal = current;
current = pos;
return retVal;
} else {
current = pos;
return null;
}
}
return JIterator;
})();
traverse(new JIterator(src));
您的简短 JSON 版本不同,现在使用这个版本,看起来像您需要的结果(attrs 已更改并且 text 从 foo 下的根移动):
{
"foo": {
"bar": {
"type": "someType",
"id": "ga241ghs"
},
"tags": [
{
"@tagId": "123",
"tagAttributes": {
"attr1": "AAA",
"attr2": "111"
}
},
{
"@tagId": "456",
"tagAttributes": {
"attr1": "BBB",
"attr2": "222"
}
}
],
"text": "My text"
}
}
弄清楚了如何直接在 Logstash 中使用 Ruby 过滤器来完成此操作 - 对于将来所有搜索此内容的人来说,这里是一个关于如何为 @tagId 执行此操作的示例:
filter {
ruby { code => '
i = 0
tagId_array = Array.new
while i < event.get( "[foo][tags]" ).length do
tagId_array = tagId_array.push(event.get( "[foo][tags][" + i.to_s + "][@tagId]" ))
i += 1
end
event.set( "foo.tags.tagId", tagId_array )
'
}
}