Logstash:展平嵌套 JSON,组合数组内的字段

Logstash: Flatten nested JSON, combine fields inside array

我有一个 JSON 看起来像这样:

{
  "foo": {
    "bar": {
      "type": "someType",
      "id": "ga241ghs"
    },
    "tags": [
      {
        "@tagId": "123",
        "tagAttributes": {
          "attr1": "AAA",
          "attr2": "111"
        }
      },
      {
        "@tagId": "456",
        "tagAttributes": {
          "attr1": "BBB",
          "attr2": "222"
        }
      }
    ]
  },
  "text": "My text"
}

实际上它并没有分成多行(只是为了更好地概述),所以它看起来像这样:

{"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":404,"attr2":416}},{"@tagId":"456","tagAttributes":{"attr1":1096,"attr2":1103}}]},"text":"My text"}

我想用 Logstash 将这个 JSON 插入到 Elasticsearch 索引中。但是,我想插入一个扁平化的 JSON,数组中的字段组合如下:

"foo.bar.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]

总的来说,插入到 Elasticsearch 的数据应该是这样的:

"foo.bar.type": "someType"
"foo.bar.id": "ga241ghs"
"foo.tags.tagId": ["123", "456"]
"foo.tags.tagAttributs.attr1": ["AAA", "BBB"]
"foo.tags.tagAttributs.attr2": ["111", "222"]
"foo.text": "My text"

这是我当前的 Logstash .conf;我能够拆分 "tags" 数组,但现在我得到了 2 个条目。

现在如何将所有 tagId 连接到一个字段,将数组的 attr1 值连接到一个字段,并将所有 attr2 值连接到另一个字段?

input {
  file {
    codec => json
    path => ["/path/to/my/data/*.json"]
    mode => "read"
    file_completed_action => "log"
    file_completed_log_path => ["/path/to/my/logfile"]
    sincedb_path => "/dev/null"
  }
}

filter {
  split {
    field => "[foo][tags]"
  }
}

output {
  stdout { codec => rubydebug }
}

非常感谢!

我的 JSON 迭代器 IIFE 的好例子 - 不需要复杂的算法,只需选择 DepthFirst,稍微修改的路径(新 "raw" 版本)就是这样。 如果您喜欢这个 JS 答案,请注意在投票按钮下勾选接受标志。

如果您想要不同的语言,也可以在相同的 GitHub.

上使用具有相似迭代器的 C# 解析器

var src = {"foo":{"bar":{"type":"someType","id":"ga241ghs"},"tags":[{"@tagId":"123","tagAttributes":{"attr1":"AAA","attr2":"111"}},{"@tagId":"456","tagAttributes":{"attr1":"BBB","attr2":"222"}}],"text":"My text"}};
//console.log(JSON.stringify(src, null, 2));
function traverse(it) {
    var dest = {};
    var i=0;
    do {
        if (it.Current().HasStringValue()) {
            var pathKey = it.Path(true).join('.');
            var check = dest[pathKey];
            if (check) {
                if (!(check instanceof Array)) dest[pathKey] = [check];
                dest[pathKey].push(it.Value());
            } else {
                dest[pathKey] = it.Value();
            }
        }
        //console.log(it.Level + '\t' + it.Path(1).join('.') + '\t' + it.KeyDots(), (it.Value() instanceof Object) ? "-" : it.Value());
    } while (it.DepthFirst());

    console.log(JSON.stringify(dest, null, 2));
    return dest;
}

/*
 * https://github.com/eltomjan/ETEhomeTools/blob/master/HTM_HTA/JSON_Iterator_IIFE.js
 * +new raw Path feature
 */
'use strict';
var JNode = (function (jsNode) {

    function JNode(_parent, _pred, _key, _value) {
        this.parent = _parent;
        this.pred = _pred;
        this.node = null;
        this.next = null;
        this.key = _key;
        this.value = _value;
    }
    JNode.prototype.HasOwnKey = function () { return this.key && (typeof this.key != "number"); }
    JNode.prototype.HasStringValue = function () { return !(this.value instanceof Object); }

    return JNode;
})();

var JIterator = (function (json) {
    var root, current, maxLevel = -1;

    function JIterator(json, parent) {
        if (parent === undefined) parent = null;
        var pred = null, localCurrent;
        for (var child in json) {
            var obj = json[child] instanceof Object;
            if (json instanceof Array) child = parseInt(child); // non-associative array
            if (!root) root = localCurrent = new JNode(parent, null, child, json[child]);
            else {
                localCurrent = new JNode(parent, pred, child, obj ? ((json[child] instanceof Array) ? [] : {}) : json[child]);
            }
            if (pred) pred.next = localCurrent;
            if (parent && parent.node == null) parent.node = localCurrent;
            pred = localCurrent;
            if (obj) {
                var memPred = pred;
                JIterator(json[child], pred);
                pred = memPred;
            }
        }
        if (this) {
            current = root;
            this.Level = 0;
        }
    }

    JIterator.prototype.Current = function () { return current; }
    JIterator.prototype.SetCurrent = function (newCurrent) {
        current = newCurrent;
        this.Level = 0;
        while(newCurrent = newCurrent.parent) this.Level++;
    }
    JIterator.prototype.Parent = function () {
        var retVal = current.parent;
        if (retVal == null) return false;
        this.Level--;
        return current = retVal;
    }
    JIterator.prototype.Pred = function () {
        var retVal = current.pred;
        if (retVal == null) return false;
        return current = retVal;
    }
    JIterator.prototype.Node = function () {
        var retVal = current.node;
        if (retVal == null) return false;
        this.Level++;
        return current = retVal;
    }
    JIterator.prototype.Next = function () {
        var retVal = current.next;
        if (retVal == null) return false;
        return current = retVal;
    }
    JIterator.prototype.Key = function () { return current.key; }
    JIterator.prototype.KeyDots = function () { return (typeof (current.key) == "number") ? "" : (current.key + ':'); }
    JIterator.prototype.Value = function () { return current.value; }
    JIterator.prototype.Reset = function () {
        current = root;
        this.Level = 0;
    }
    JIterator.prototype.RawPath = function () {
        var steps = [], level = current;
        do {
            if (level != null && level.value instanceof Object) {
                steps.push(level.key + (level.value instanceof Array ? "[]" : "{}"));
            } else {
                if (level != null) steps.push(level.key);
                else break;
            }
            level = level.parent;
        } while (level != null);
        var retVal = "";
        retVal = steps.reverse();
        return retVal;
    }
    JIterator.prototype.Path = function (raw) {
        var steps = [], level = current;
        do {
            if (level != null && level.value instanceof Object) {
                var size = 0;
                var items = level.node;
                if (typeof (level.key) == "number" && !raw) steps.push('[' + level.key + ']');
                else {
                    if(raw) {
                        if (typeof (level.key) != "number") steps.push(level.key);
                    } else {
                        while (items) {
                            size++;
                            items = items.next;
                        }
                        var type = (level.value instanceof Array ? "[]" : "{}");
                        var prev = steps[steps.length - 1];
                        if (prev && prev[0] == '[') {
                            var last = prev.length - 1;
                            if (prev[last] == ']') {
                                last--;
                                if (!isNaN(prev.substr(1, last))) {
                                    steps.pop();
                                    size += '.' + prev.substr(1, last);
                                }
                            }
                        }
                        steps.push(level.key + type[0] + size + type[1]);
                    }
                }
            } else {
                if (level != null) {
                    if (typeof (level.key) == "number") steps.push('[' + level.key + ']');
                    else steps.push(level.key);
                }
                else break;
            }
            level = level.parent;
        } while (level != null);
        var retVal = "";
        retVal = steps.reverse();
        return retVal;
    }
    JIterator.prototype.DepthFirst = function () {
        if (current == null) return 0; // exit sign
        if (current.node != null) {
            current = current.node;
            this.Level++;
            if (maxLevel < this.Level) maxLevel = this.Level;
            return 1; // moved down
        } else if (current.next != null) {
            current = current.next;
            return 2; // moved right
        } else {
            while (current != null) {
                if (current.next != null) {
                    current = current.next;
                    return 3; // returned up & moved next
                }
                this.Level--;
                current = current.parent;
            }
        }
        return 0; // exit sign
    }
    JIterator.prototype.BreadthFirst = function () {
        if (current == null) return 0; // exit sign
        if (current.next) {
            current = current.next;
            return 1; // moved right
        } else if (current.parent) {
            var level = this.Level, point = current;
            while (this.DepthFirst() && level != this.Level);
            if (current) return 2; // returned up & moved next
            do {
                this.Reset();
                level++;
                while (this.DepthFirst() && level != this.Level);
                if (current) return 3; // returned up & moved next
            } while (maxLevel >= level);
            return current != null ? 3 : 0;
        } else if (current.node) {
            current = current.node;
            return 3;
        } else if (current.pred) {
            while (current.pred) current = current.pred;
            while (current && !current.node) current = current.next;
            if (!current) return null;
            else return this.DepthFirst();
        }
    }
    JIterator.prototype.ReadArray = function () {
        var retVal = {};
        var item = current;
        do {
            if (item.value instanceof Object) {
                if (item.value.length == 0) retVal[item.key] = item.node;
                else retVal[item.key] = item;
            } else retVal[item.key] = item.value;
            item = item.next;
        } while (item != null);
        return retVal;
    }
    JIterator.prototype.FindKey = function (key) {
        var pos = current;
        while (current && current.key != key) this.DepthFirst();
        if (current.key == key) {
            var retVal = current;
            current = pos;
            return retVal;
        } else {
            current = pos;
            return null;
        }
    }

    return JIterator;
})();

traverse(new JIterator(src));

您的简短 JSON 版本不同,现在使用这个版本,看起来像您需要的结果(attrs 已更改并且 text 从 foo 下的根移动):

{
  "foo": {
    "bar": {
      "type": "someType",
      "id": "ga241ghs"
    },
    "tags": [
      {
        "@tagId": "123",
        "tagAttributes": {
          "attr1": "AAA",
          "attr2": "111"
        }
      },
      {
        "@tagId": "456",
        "tagAttributes": {
          "attr1": "BBB",
          "attr2": "222"
        }
      }
    ],
    "text": "My text"
  }
}

弄清楚了如何直接在 Logstash 中使用 Ruby 过滤器来完成此操作 - 对于将来所有搜索此内容的人来说,这里是一个关于如何为 @tagId 执行此操作的示例:

filter {
        ruby { code => '
            i = 0
            tagId_array = Array.new
            while i < event.get( "[foo][tags]" ).length do
                tagId_array = tagId_array.push(event.get( "[foo][tags][" + i.to_s + "][@tagId]" ))
                i += 1
                end
            event.set( "foo.tags.tagId", tagId_array )
        '
        }
}