如何从一个对象数组中提取所有可能匹配的对象数组？

Question

我有一组对象，例如

var arr = [
    {"a": "x"},
    {"b": "0"},
    {"c": "k"},
    {"a": "nm"},
    {"b": "765"},
    {"ab": "i"},
    {"bc": "x"},
    {"ab": "4"},
    {"abc": "L"}
];

假设我只对键对应于 var input = ["ab", "bc"] 的对象感兴趣。这意味着我想通过以下方式提取 所有可能的 具有 result[i].length == 2 的子数组：

var result = [
    [{"ab": "i"}, {"bc": "x"}],
    [{"ab": "4"}, {"bc": "x"}] // or [{"bc": "x"}, {"ab": "4"}]
];

——也就是说，子数组中对象的顺序绝对不重要：我只对每个子数组包含两个对象这一事实感兴趣——{"ab": ...} 和 {"bc": ...}。

如果我对var input = ["a","a","ab"]感兴趣，结果应该是这样的：

var result = [
    [{"a": "x"}, {"a": "nm"}, {"ab": "i"}],
    [{"a": "x"}, {"a": "nm"}, {"ab": "4"}]
];

如果没有阶乘级的计算量，我找不到实现预期结果的方法（假设 input.length 可能比 2 或 3 大得多——甚至 15-20 可能还不够），这在物理上是不可能的。有没有办法有一些合理的性能来解决这样的问题？
重要说明：是的，显然，对于相对较大的 input.length 值，理论上可能有非常多的可能组合，但实际上，result.length总是相当小（可能是 100-200，我什至怀疑它能达到 1000...）。但是为了安全起见，我只想设置一些限制（比如 1000），这样一旦 result.length 达到这个限制，函数就会 returns 当前 result 并停止。

Answer 1

您可以使用循环手动执行此操作，但您也可以使用内置函数 Array.prototype.filter() to filter the array and Array.prototype.indexOf 检查元素是否在另一个数组中：

var filtered = arr.filter(function(pair){
    return input.indexOf(Object.keys(pair)[0]) != -1;
});

这会为您提供仅包含符合条件的对象的数组。

现在数学语言中result数组的东西叫做"combinations"。这正是你想要的，所以我不会在这里描述它。这里给出了一种生成数组（集合）所有组合的方法——

下面是这个函数的使用方法：

// function assumes each element is array, so we need to wrap each one in an array 
for(var i in filtered) {
    filtered[i] = [filtered[i]];
}
var result = getCombinations(filtered, input.length /* how many elements in each sub-array (subset) */);

Object.keys(pair)[0] 是一种无需迭代即可获取对象第一个键的方法 ()

Answer 2

按字母顺序 arr 和 input 排序 ，这是 O(nlogn)，如果您能够在构建数组时做到这一点，你可能会受益。

让我用一个例子来解释我的想法：

var arr = [
    {"a": "x"},
    {"ab": "i"},
    {"ab": "4"},
    {"abc": "L"}
    {"bc": "x"},
];
var input = ["ab", "bc"];

在 arr 中搜索 input[0]（线性或什至使用二进制搜索来加快速度）。标记索引。

在arr中搜索input[1]，但只考虑arr的子数组，从上一步标记的索引，到它的末尾

如果您找到 input 的所有元素，然后将其推送到 results（您可以为此保留一个临时对象）。

现在，我们必须再次搜索 input[0]，因为可能有两个或更多条目共享该密钥。您将存储我之前提到的那个索引，这样您就可以从该索引开始再次搜索，并且由于 arr 已排序，您将只需要检查下一个元素，依此类推。

时间复杂度：

对数组进行排序（假设您在构建它们时无法对它们进行排序）：O(nlogn)，其中 n 是 arr 具有的元素数。

在 arr 中对 input[0] 进行二进制搜索：O(logn)

现在搜索的下一步（对于 input[1]）比 arr 的长度小得多，所以 非常悲观 边界将是 O (n).实际上，它当然不会是 O(n)，如果你愿意，你也可以对 input[1] 进行二进制搜索，这将花费 O(logm)，其中 m 是arr[index_stored: -1].

在这一点上，我们继续寻找 input[0] 的下一次出现，当然如果有的话，但是因为我们已经存储了索引，所以我们确切地知道从哪里开始搜索，我们必须检查只有下一个元素，这是一个不变的成本，因此 O(1).

然后我们对input[1]做同样的事情，又便宜了。

现在，这完全取决于 input 的长度，即 k，似乎 k < n，以及您有多少次出现的密钥，对吗？

但假设在正常平均情况下，整个过程的时间复杂度为：

O(nlogn)

但是，请注意，您必须支付一些额外的内存来存储索引，这取决于键出现的次数。使用蛮力 aglrotihm，这会更慢，您不需要为内存支付任何额外费用。

Answer 3

也许不是最佳方式。我可能会使用一些库作为最终解决方案，但这里有一些步骤可以帮助您找到一条快乐的道路。我会尽快添加一些评论。

为源数组中的单个键生成映射（即在哪些索引处看到它，因为我们可能有多个条目）

   function getKeyMap( src, key ){
        var idx_arr = [];
        src.forEach(function(pair,idx){ if(Object.keys(pair)[0] === key){ idx_arr.push(idx)} });
        return idx_arr;
    }

并且必须为您希望成为过滤一部分的所有键完成此映射

function getKeysMap( src, keys ){
    var keys_map = [];
    keys.forEach(function(aKey){
        var aMap = getKeyMap(src,aKey);
        if( aMap.length ){
            keys_map.push(aMap);
        }

    });
    // if keys map lenght is less then keys length then you should throw an exception or something
    return keys_map;
}

然后你想建立所有可能的组合。我在这里使用递归，也许不是最佳方式

function buildCombos( keys_map, carry, result ){
    if( keys_map.length === 0){
        result.push(carry);
        return;
    }
    var iter = keys_map.pop();
    iter.forEach(function(key){
        var cloneMap = keys_map.slice(0);
        var clone = carry.slice(0);
        clone.push(key);
        buildCombos(cloneMap, clone, result);
    });
}

然后我需要过滤结果以排除重复条目和具有重复索引的条目

function uniqueFilter(value, index, self) {
    return self.indexOf(value) === index;
}

function filterResult( map ){
    var filter = {};
    map.forEach(function(item){
        var unique = item.filter( uniqueFilter );
        if(unique.length === item.length){
            filter[unique.sort().join('')]=true;}
        });
    return filter;
}

然后我简单地将生成的过滤后的地图解码为原始数据

function decodeMap( map,src ){
    var result = [];
    Object.keys(map).forEach(function(item){
        var keys = item.split('');
        var obj = [];
        keys.forEach(function( j ){
            obj.push( src[j])
        });
        result.push(obj);
    });
    return result;
}

包装器

function doItAll(arr, keys){
    // Get map of they keys in terms of numbers
    var maps = getKeysMap( arr, keys);
    // build combinations out of key map
    var combos = [];
    buildCombos(maps,[],combos);
    // filter results to get rid of same sequences and same indexes ina sequence
    var map = filterResult(combos);
    // decode map into the source array
    return decodeMap( map, arr )
}

用法：

var res = doItAll(arr, ["a","a","ab"])

Answer 4

看到问题，有点像笛卡尔积。事实上，如果在操作之前，稍微修改一下数据模型，预期的结果几乎在所有情况下都是笛卡尔积。但是，有一个案例（您提供的第二个示例）需要特殊处理。这是我所做的：

稍微调整一下模型数据（这只会完成一次）以获得适合应用笛卡尔积的东西。
处理 "special case" 有多个参数请求相同字符串的问题。

所有重要的逻辑都在 cartessianProdModified 中。代码中的重要部分已被注释。希望它能帮助您解决问题或至少给您一些想法。

这是一个 fiddle，这是代码：

var arr = [
    {"a": "x"},
    {"b": "0"},
    {"c": "k"},
    {"a": "nm"},
    {"b": "765"},
    {"ab": "i"},
    {"bc": "x"},
    {"ab": "4"},
    {"abc": "L"},
    {"dummy": "asdf"}
];

// Utility function to be used in the cartessian product
function flatten(arr) {
    return arr.reduce(function (memo, el) {
        return memo.concat(el);
    }, []);
}

// Utility function to be used in the cartessian product
function unique(arr) {
    return Object.keys(arr.reduce(function (memo, el) {
        return (memo[el] = 1) && memo;
    }, {}));
}

// It'll prepare the output in the expected way
function getObjArr(key, val, processedObj) {
    var set = function (key, val, obj) {
        return (obj[key] = val) && obj;
    };
    // The cartessian product is over so we can put the 'special case' in an object form so that we can get the expected output.
    return val !== 'repeated' ? [set(key, val, {})] : processedObj[key].reduce(function (memo, val) {
        return memo.concat(set(key, val, {}));
    }, []);
}

// This is the main function. It'll make the cartessian product.
var cartessianProdModified = (function (arr) {
    // Tweak the data model in order to have a set (key: array of values)
    var processedObj = arr.reduce(function (memo, obj) {
        var firstKey = Object.keys(obj)[0];
        return (memo[firstKey] = (memo[firstKey] || []).concat(obj[firstKey])) && memo;
    }, {});

    // Return a function that will perform the cartessian product of the args.
    return function (args) {
        // Spot repeated args.
        var countArgs = args.reduce(function (memo, el) {
                return (memo[el] = (memo[el] || 0) + 1) && memo;
            }, {}),
            // Remove repeated args so that the cartessian product works properly and more efficiently.
            uniqArgs = unique(args);

        return uniqArgs
                .reduce(function (memo, el) {
                    return flatten(memo.map(function (x) {
                        // Special case: the arg is repeated: we have to treat as a unique value in order to do the cartessian product properly
                        return (countArgs[el] > 1 ? ['repeated'] : processedObj[el]).map(function (y) {
                            return x.concat(getObjArr(el, y, processedObj));
                        });
                    }));
                }, [[]]);
    };
})(arr);

console.log(cartessianProdModified(['a', 'a', 'ab']));

Answer 5

如果您能够使用 ES6 功能，则可以使用生成器来避免必须创建大型中间数组。看起来您想要一组集合，其中的行仅包含唯一值。正如其他人也提到的那样，您可以从 cartesian product 个与您的 input 键匹配的对象开始：

'use strict';

function* product(...seqs) {
    const indices = seqs.map(() => 0),
          lengths = seqs.map(seq => seq.length);

    // A product of 0 is empty
    if (lengths.indexOf(0) != -1) {
        return;
    }

    while (true) {
        yield indices.map((i, iseq) => seqs[iseq][i]);
        // Update indices right-to-left
        let i;
        for (i = indices.length - 1; i >= 0; i--) {
            indices[i]++;
            if (indices[i] == lengths[i]) {
                // roll-over
                indices[i] = 0;
            } else {
                break;
            }
        }
        // If i is negative, then all indices have rolled-over
        if (i < 0) {
            break;
        }
    }
}

生成器只保存迭代之间的索引并按需生成新行。要实际加入与您的 input 键匹配的对象，您首先必须创建一个查找：

function join(keys, values) {
    const lookup = [...new Set(keys)].reduce((o, k) => {
        o[k] = [];
        return o;
    }, {});

    // Iterate over array indices instead of objects them selves.
    // This makes producing unique rows later on a *lot* easier.
    for (let i of values.keys()) {
       const k = Object.keys(values[i])[0];
       if (lookup.hasOwnProperty(k)) {
           lookup[k].push(i);
       } 
    }

    return product(...keys.map(k => lookup[k]));
}

然后您需要过滤掉包含重复值的行：

function isUniq(it, seen) {
    const notHadIt = !seen.has(it);
    if (notHadIt) {
        seen.add(it);
    }
    return notHadIt;
}

function* removeDups(iterable) {
    const seen = new Set();
    skip: for (let it of iterable) {
        seen.clear();
        for (let x of it) {
            if (!isUniq(x, seen)) {
                continue skip;
            }
        }
        yield it;
    }
}

还有全局唯一行（集合方面）：

function* distinct(iterable) {
    const seen = new Set();
    for (let it of iterable) {
        // Bit of a hack here, produce a known order for each row so
        // that we can produce a "set of sets" as output. Rows are
        // arrays of integers.
        const k = it.sort().join();
        if (isUniq(k, seen)) {
            yield it;
        }
    }
}

把它绑起来：

function* query(input, arr) {
    for (let it of distinct(removeDups(join(input, arr)))) {
        // Objects from rows of indices
        yield it.map(i => arr[i]);
    }
}

function getResults(input, arr) {
    return Array.from(query(input, arr));
}

在行动：

const arr = [
    {"a": "x"},
    {"b": "0"},
    {"c": "k"},
    {"a": "nm"},
    {"b": "765"},
    {"ab": "i"},
    {"bc": "x"},
    {"ab": "4"},
    {"abc": "L"}
];

console.log(getResults(["a", "a", "ab"], arr));
/*
[ [ { a: 'x' }, { a: 'nm' }, { ab: 'i' } ],
  [ { a: 'x' }, { a: 'nm' }, { ab: '4' } ] ]
*/

和强制性的 jsFiddle。

如何从一个对象数组中提取所有可能匹配的对象数组？

How to extract all possible matching arrays of objects from one array of objects?

javascript

arrays

algorithm

search

sub-array