使用下划线或 lodash 或 vanilla js,如何找出数组中哪个元素出现最频繁?

Using underscore or lodash or vanilla js, how to figure out which element of an array occurs most frequently?

如果我有一个包含对象的数组,我如何确定哪个对象在该数组中最常出现?

假设数组是一个包含产品的数组,每个产品都是一个具有唯一 ID 的对象(每个产品唯一,而不是数组中的每个元素):

const products = [
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  },
  {
    "itemId": "573c82bd5c5ade1100532ec0",
    "price": 100,
    "cartQuantity": 1,
    "itemName": "Carpet"
  },
  {
    "itemId": "5734126218c933d0085ca8b0",
    "_id": "57608d4187faf12708605360",
    "price": 15,
    "cartQuantity": 1,
    "itemName": "Black Coffee"
  },
  {
    "itemId": "573412dd18c933d0085ca8b3",
    "_id": "57608d3d87faf12708605362",
    "price": 50,
    "cartQuantity": 1,
    "itemName": "Nyquil"
  },
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  }
];

在上面的代码片段中,您可以看到 "Tea" 出现的频率最高。我目前缺乏必要的算法知识来弄清楚如何从该数组中提取 "Tea" 对象作为最常出现的元素。此外,如果有人可以在这里帮助我,请务必记住一个例外情况,即数组中的两个元素出现的次数相同,并且它们 "tie" 最频繁。

如果这对某人来说是一个容易解决的问题,请对我放轻松并在您的耐心允许的范围内尽可能详细地解释您是如何解决这个问题的,因为我还在学习。欢迎使用下划线或 lodash 函数回答!

我认为最简单的方法是遍历数组并将当前产品的每个 itemId 作为键添加到对象中(包含计数为 1 的值和数组中的索引).

如果该元素已经在对象中,则将计数值加 1。最后您只需在对象中找到最大计数值。

解决方案:

function mostOccuringProduct(productList) {
  const frequencies = {};

  // iterating through each product counting the occurrence of each itemId
  productList.forEach((product, i) => {
    if (product.itemId in frequencies) {
      frequencies[product.itemId].count++;
    } else {
      frequencies[product.itemId] = { count: 1, index: i };
    }
  })

  // find max number of occurences of any 1 item
  const maxOccurences = _.max(frequencies, item => item.count);
  // get array of items that tie for this maxOccurences #
  const itemsMatchingMaxOccurences = _.filter(frequencies, item => item.count === maxOccurences.count);
  // create array of just the indicies of the items that tie for the maxNumber of occurences
  const indicesOfMaxOccuringItems = _.map(itemsMatchingMaxOccurences, frequency => frequency.index);

  // initialize an array to hold all items that tie for max occurencies
  const products = [];
  // push items to this array
  indicesOfMaxOccuringItems.forEach(index => products.push(productList[index]));


  // return an array of all products that occur most frequently.
  // or just return the product that occurs most IF there is only one.
  return products.length > 1 ? products : products[0];
}

此解决方案将 return 列表中出现次数最多的一组产品。如果只有一项,它将 return 1 项。

您可以像这样格式化输出:

[
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "ocurrences": 2
  },
  {
    "itemId": "573c82bd5c5ade1100532ec0",
    "ocurrences": 1
  },
  ...
  ]

什么是第一个元素在数组中重复次数最多的排序数组。按照排序,您可以轻松检查第二个或下一个元素是否具有相同的出现次数。

要实现这一点,您需要这样的东西:

let getOcurrences = array =>{
  let ocurrences = {};
  array.forEach(item=>{
    if (!ocurrences[item.itemId])
      ocurrences[item.itemId] = 1;
    else 
      ocurrences[item.itemId] += 1;
  });
  return Object.keys(ocurrences)
    .map(itemId => ({
      itemId : itemId,
      ocurrences : ocurrences[itemId]
    }))
    .sort((a, b)=> b.ocurrences - a.ocurrences);
};

const products = [
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  },
  {
    "itemId": "573c82bd5c5ade1100532ec0",
    "price": 100,
    "cartQuantity": 1,
    "itemName": "Carpet"
  },
  {
    "itemId": "5734126218c933d0085ca8b0",
    "_id": "57608d4187faf12708605360",
    "price": 15,
    "cartQuantity": 1,
    "itemName": "Black Coffee"
  },
  {
    "itemId": "573412dd18c933d0085ca8b3",
    "_id": "57608d3d87faf12708605362",
    "price": 50,
    "cartQuantity": 1,
    "itemName": "Nyquil"
  },
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  }
];
console.log(getOcurrences(products));

我建议在迭代产品并跟踪最频繁的产品时使用哈希图来计算出现次数:

function getMostFrequent(array, identityFn) {
  var counts = {}, max = 0, result;
  
  array.forEach(element => {
    var id = (identityFn && identityFn(element)) || element,
        count = counts[id] = (counts[id] || 0) + 1;
    if (count > max) {
      max = count;
      result = element;
    }
  });
  return result;
}

const products = [{"itemId": 1}, {"itemId": 2}, {"itemId": 2}, {"itemId": 3}];

var mostFrequent = getMostFrequent(products, p => p["itemId"]);

console.log(mostFrequent);

如果您想要 所有 产品并列最频繁而不只是第一个,请将结果声明为 Array 并将元素推送到 count == max

getMostFrequent()的解释:

我们遍历给定 array 中的所有元素,对于每个元素我们...

  • 通过提供的 identityFn 回调函数获取其 id
  • 为此 id 初始化或增加计数器 counts[id] = (counts[id] || 0) + 1;
  • 最后与迄今为止出现频率最高的元素进行比较,如果出现频率更高则进行更新。

这是我的 O(n) 解决方案。我们创建一个散列 table 并将散列 table 中的计数保存在一个名为索引的单独 属性 中,它是一个数组。当一个项目第一次出现时,它的索引值(在产品数组中)被插入到索引数组索引位置 1。当另一个项目第一次出现时,它会覆盖索引位置 1。因此,当一个项目第二次出现时,它被插入到指数 [2] 等等。最后 indices.pop() 会给我们最频繁的产品的项目索引,我们不会变形产品数组。

var products = [
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  },
  {
    "itemId": "573c82bd5c5ade1100532ec0",
    "price": 100,
    "cartQuantity": 1,
    "itemName": "Carpet"
  },
  {
    "itemId": "5734126218c933d0085ca8b0",
    "_id": "57608d4187faf12708605360",
    "price": 15,
    "cartQuantity": 1,
    "itemName": "Black Coffee"
  },
  {
    "itemId": "573412dd18c933d0085ca8b3",
    "_id": "57608d3d87faf12708605362",
    "price": 50,
    "cartQuantity": 1,
    "itemName": "Nyquil"
  },
  {
    "itemId": "573412ab18c933d0085ca8b2",
    "price": 20,
    "cartQuantity": 1,
    "itemName": "Tea"
  }
],
lut = products.reduce((p,c,i) => { ++p[c.itemId] || (p[c.itemId] = 1);
                                   p.indices[p[c.itemId]] = i;
                                   return p;
                                 },{indices:[]});
console.log(lut);
console.log(products[lut.indices[lut.indices.length-1]]);