如何在不突变的情况下按功能创建高效分组?

How to create an efficient group by function without mutation?

有没有一种方法可以在不改变的情况下按功能高效地实现分组?

天真的实现:

var messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
]

var messagesGroupedByDate = messages.reduce(function (data, message) {
  if (
    data.some(function (point) {
      return point.date === message.insertedAt;
    })
  ) {
    return data.map(function (point) {
      if (point.date === message.insertedAt) {
        return {
          date: point.date,
          count: (point.count + 1) | 0,
        };
      } else {
        return point;
      }
    });
  } else {
    return data.concat([
      {
        date: message.insertedAt,
        count: 1,
      },
    ]);
  }
}, []);


console.log(messagesGroupedByDate);

为了争论,没有必要使它更通用。我面临的问题是我循环了三次:

如果在 ReScript 中真的没有什么好的方法可以使它变得高效,那么我总是可以使用原始 JavaScript 来实现这个功能,但我很好奇是否可以在不改变的情况下有效地做到这一点。

您可以通过按日期构建计数对象,然后根据需要使用 Object.entries and Array.map 将其转换为对象数组来更简单地进行分组:

var messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
];

var messagesGroupedByDate = Object.entries(
  messages.reduce((data, message) => {
    data[message.insertedAt] = data[message.insertedAt] || 0;
    data[message.insertedAt]++;
    return data;
  }, {})
).map(([date, count]) => ({ date, count }));

console.log(messagesGroupedByDate);

您还可以直接从 messages 中的值创建一个对象,然后通过遍历 messages:

来更新计数

var messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
]

const data = Object.fromEntries(messages.map(({ insertedAt: date }) => [ date, 0 ]));

messages.forEach(({ insertedAt: date }) => data[date]++);

const messagesGroupedByDate = Object.entries(data).map(([date, count])=> ({date, count}));

console.log(messagesGroupedByDate);

只需将数据添加到 Map(),然后转换为数组,再转换为对象。它不会根据您的要求改变任何东西。

我们可以进一步简化这个,但现在是 5:00 上午,我的大脑现在正在睡觉。

var messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
];

const mapped = new Map();

messages.forEach(message => {
    // if date already seen before, increment the count
    if (mapped.has(message.insertedAt)) {
        const count = mapped.get(message.insertedAt);
        mapped.set(message.insertedAt, count+1);
    } else {
        // date never seen before, add to map with initial count
        mapped.set(message.insertedAt, 1);
    }
});

const msgArr = Array.from(mapped);

const final = msgArr.map(([date, count])=> ({date, count}));

console.log(final);

你需要一个地图作为中间数据结构:

{"2021-01-18": 2, /*…*/}

然后将其解构成对并将这些对重新映射到对象中:

const count =
  xs =>
    Object.entries(
      xs.reduce((acc, {insertedAt: k}) =>
        (acc[k] = (acc[k] ?? 0) + 1, acc), {}))
          .map(([k, v]) => ({date: k, count: v}));

// similar to count above but 100% pure
const count_pure =
  xs =>
    Object.entries(
      xs.reduce((acc, {insertedAt: k}) =>
        ({...acc, [k]: (acc[k] ?? 0) + 1}), {}))
          .map(([k, v]) => ({date: k, count: v}));


console.log(count(messages));
console.log(count_pure(messages));
<script>
var messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
]
</script>

执行此操作时通常会使用扩展运算符 ,但根据您需要处理的项目数量,这可能很快就会变得效率低下。请参阅我写的关于该主题的要点 https://gist.github.com/customcommander/97eb4b3f1600773db59406d39f3f9cd7

虽然问题有点老,但我想我会为未来的读者分享我的代码。下面的代码是用 rescript 编写的并且完全不可变,因为我使用了 rescript.

中的 Immutable Map
type message = {insertedAt: string}

let messages = [
  {insertedAt: "2021-01-10"},
  {insertedAt: "2021-01-12"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-13"},
  {insertedAt: "2021-01-14"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-15"},
  {insertedAt: "2021-01-16"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-17"},
  {insertedAt: "2021-01-18"},
  {insertedAt: "2021-01-18"},
]

// Map Values
// Reduce into an immutable map
// Convert to tuple Array
// Log it

messages
->Belt.Array.reduce(Belt.Map.String.empty, (m, v) =>
  // Here Every set is creating a new map
  m->Belt.Map.String.set(v.insertedAt, m->Belt.Map.String.getWithDefault(v.insertedAt, 0) + 1)
)
->Belt.Map.String.toArray
->Js.log

Run In Rescript Playground. More on Immutable Map in rescript here.

输出:

[ [ '2021-01-10', 1 ],
  [ '2021-01-12', 1 ],
  [ '2021-01-13', 3 ],
  [ '2021-01-14', 1 ],
  [ '2021-01-15', 2 ],
  [ '2021-01-16', 1 ],
  [ '2021-01-17', 3 ],
  [ '2021-01-18', 2 ] ]