使用 ramdajs 创建前 5 个聚合

creating top 5 aggregation with ramdajs

我想转换这个输入

[
        { country: 'France', value: 100 },
        { country: 'France', value: 100 },
        { country: 'Romania', value: 500 },
        { country: 'England', value: 400 },
        { country: 'England', value: 400 },
        { country: 'Spain', value: 130 },
        { country: 'Albania', value: 4 },
        { country: 'Hungary', value: 3 }
]

进入输出

[
      { country: 'England', value: 800 },
      { country: 'Romania', value: 500 },
      { country: 'France', value: 200 },
      { country: 'Spain', value: 130 },
      { country: 'Other', value: 8 }
]

这基本上是 前 4 个国家和其他国家的价值总和

我正在使用 javascript 和 ramdajs, and I only managed to do it in a somehow cumbersome way so far

我正在寻找一个优雅的解决方案:是否有任何函数式程序员能够提供他们的解决方案?或者有什么有用的 ramda 方法的想法吗?

我认为你可以稍微简化 groupOthersKeeping 通过在减少数组之前拆分数组,就 ramda 而言,它可能如下所示:

const groupOthersKeeping = contriesToKeep => arr => [
    ...slice(0, contriesToKeep, arr),
    reduce(
      (acc, i) => ({ ...acc, value: acc.value + i.value }),
      { country: 'Others', value: 0 },
      slice(contriesToKeep, Infinity, arr)
    )
 ]

使用更多的 ramda 函数但不确定哪个更好:

let country = pipe(
  groupBy(prop('country')),
  map(pluck('value')),
  map(sum)
)([
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
]);

let splitCountry = pipe(
  map((k) => ({country: k, value: country[k]})),
  sortBy(prop('value')),
  reverse,
  splitAt(4)
)(keys(country));

splitCountry[0].push({country: 'Others', value: sum(map(prop('value'))(splitCountry[1]))});
splitCountry[0]

我尝试了一下,并尝试将它的功能用于大多数事情。并保持单一 pipe

const f = pipe(
  groupBy(prop('country')),
  map(map(prop('value'))),
  map(sum),
  toPairs(),
  sortBy(prop(1)),
  reverse(),
  addIndex(map)((val, idx) => idx<4?val:['Others',val[1]]),
  groupBy(prop(0)),
  map(map(prop(1))),
  map(sum),
  toPairs(),
  map(([a,b])=>({'country':a,'value':b}))
)

Ramda REPL


但是,我认为它没有任何可读性。

这是我的两分钱。

const a = [
    { country: 'France', value: 100 },
    { country: 'France', value: 100 },
    { country: 'Romania', value: 500 },
    { country: 'England', value: 400 },
    { country: 'England', value: 400 },
    { country: 'Spain', value: 130 },
    { country: 'Albania', value: 4 },
    { country: 'Hungary', value: 3 }
];

const diff = (a, b) => b.value - a.value;
const addValues = (acc, {value}) => R.add(acc,value);
const count = R.reduce(addValues, 0);
const toCountry = ({country}) => country;
const toCountryObj = (x) => ({'country': x[0], 'value': x[1] });
const reduceC = R.reduceBy(addValues, [], toCountry);

const [countries, others] = R.compose(
    R.splitAt(4), 
    R.sort(diff), 
    R.chain(toCountryObj), 
    R.toPairs, 
    reduceC)(a);

const othersArray = [{ 'country': 'Others', 'value': count(others) }];

R.concat(countries, othersArray);

Ramda REPL

(每一步都得到上一步的输出,最后会把所有东西放在一起。)

第 1 步:获取求和图

你可以这样改造:

[
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
]

进入这个:

{
  Albania: 4,
  England: 800,
  France: 200,
  Hungary: 3,
  Romania: 500,
  Spain: 130
}

有了这个:

const reducer = reduceBy((sum, {value}) => sum + value, 0);
const reduceCountries = reducer(prop('country'));

第 2 步:将其转换回排序数组

[
  { country: "Hungary", value: 3 },
  { country: "Albania", value: 4 },
  { country: "Spain", value: 130 },
  { country: "France", value: 200 },
  { country: "Romania", value: 500 },
  { country: "England", value: 800 }
]

您可以使用:

const countryFromPair = ([country, value]) => ({country, value});
pipe(toPairs, map(countryFromPair), sortBy(prop('value')));

第 3 步:创建两个子组,non-top-4 个国家和前 4 个国家

[
  [
    { country: "Hungary", value: 3},
    { country: "Albania", value: 4}
  ],
  [
    { country: "Spain", value: 130 },
    { country: "France", value: 200 },
    { country: "Romania", value: 500 },
    { country: "England", value: 800 }
  ]
]

你可以用这个做什么:

splitAt(-4)

第四步:合并第一个子组

[
  [
    { country: "Others", value: 7 }
  ],
  [
    { country: "Spain", value: 130 },
    { country: "France", value: 200 },
    { country: "Romania", value: 500 },
    { country: "England", value: 800 }
  ]
]

有了这个:

over(lensIndex(0), compose(map(countryFromPair), toPairs, reduceOthers));

第 5 步:展平整个数组

[
  { country: "Others", value: 7 },
  { country: "Spain", value: 130 },
  { country: "France", value: 200 },
  { country: "Romania", value: 500 },
  { country: "England", value: 800 }
]

flatten

完整的工作示例

const data = [
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
];

const reducer = reduceBy((sum, {value}) => sum + value, 0);
const reduceOthers = reducer(always('Others'));
const reduceCountries = reducer(prop('country'));
const countryFromPair = ([country, value]) => ({country, value});

const top5 = pipe(
  reduceCountries,
  toPairs,
  map(countryFromPair),
  sortBy(prop('value')),
  splitAt(-4),
  over(lensIndex(0), compose(map(countryFromPair), toPairs, reduceOthers)),
  flatten
);

top5(data)

这是一个方法:

const combineAllBut = (n) => pipe(drop(n), pluck(1), sum, of, prepend('Others'), of)

const transform = pipe(
  groupBy(prop('country')),
  map(pluck('value')),
  map(sum),
  toPairs,
  sort(descend(nth(1))),
  lift(concat)(take(4), combineAllBut(4)),
  map(zipObj(['country', 'value']))
)

const countries = [{ country: 'France', value: 100 }, { country: 'France', value: 100 }, { country: 'Romania', value: 500 }, { country: 'England', value: 400 }, { country: 'England', value: 400 }, { country: 'Spain', value: 130 }, { country: 'Albania', value: 4 }, { country: 'Hungary', value: 3 }]

console.log(transform(countries))
<script src="https://bundle.run/ramda@0.26.1"></script>
<script>
const {pipe, groupBy, prop, map, pluck, sum, of, prepend, toPairs, sort, descend, nth, lift, concat, take, drop, zipObj} = ramda
</script>

除了一条复杂的线 (lift(concat)(take(4), combineAllBut(4))) 和相关的辅助函数 (combineAllBut),这是一组简单的转换。该辅助函数在该函数之外可能没有用,因此将其内联为 lift(concat)(take(4), pipe(drop(4), pluck(1), sum, of, prepend('Others'), of)) 是完全可以接受的,但我发现生成的函数有点难以阅读。

请注意,该函数将 return 类似于 [['Other', 7]],这是一种无意义的格式,事实上我们将 concat 它与顶部的数组四。所以至少有一些关于删除最后的 of 并将 concat 替换为 flip(append) 的争论。我没有这样做,因为除了在此管道的上下文中,辅助函数没有任何意义。但如果有人会选择其他方式,我会理解。

我喜欢这个函数的其余部分,它似乎很适合 Ramda 管道风格。但是辅助函数在某种程度上破坏了它。我很想听听有关简化它的建议。

更新

然后来自 customcommander 的回答展示了我可以采取的简化,通过使用 reduceBy 而不是上述方法中的 groupBy -> map(pluck) -> map(sum) 舞蹈。这带来了明显的改善。

const combineAllBut = (n) => pipe(drop(n), pluck(1), sum, of, prepend('Others'), of)

const transform = pipe(
  reduceBy((a, {value}) => a + value, 0, prop('country')),
  toPairs,
  sort(descend(nth(1))),
  lift(concat)(take(4), combineAllBut(4)),
  map(zipObj(['country', 'value']))
)

const countries = [{ country: 'France', value: 100 }, { country: 'France', value: 100 }, { country: 'Romania', value: 500 }, { country: 'England', value: 400 }, { country: 'England', value: 400 }, { country: 'Spain', value: 130 }, { country: 'Albania', value: 4 }, { country: 'Hungary', value: 3 }]

console.log(transform(countries))
<script src="https://bundle.run/ramda@0.26.1"></script>
<script>
const {pipe, reduceBy, prop, map, pluck, sum, of, prepend, toPairs, sort, descend, nth, lift, concat, take, drop, zipObj} = ramda
</script>

我会按国家/地区分组,将每个国家/地区组合并为一个对象,同时对值求和、排序、拆分为两个数组 [highest 4] 和 [others],将其他合并为一个对象,然后连接最高 4.

const { pipe, groupBy, prop, values, map, converge, merge, head, pluck, sum, objOf, sort, descend, splitAt, concat, last, of, assoc } = R

const sumProp = key => pipe(pluck(key), sum, objOf(key))

const combineProp = key => converge(merge, [head, sumProp(key)])

const getTop5 = pipe(
  groupBy(prop('country')),
  values, // convert to array of country arrays
  map(combineProp('value')), // merge each sub array to a single object
  sort(descend(prop('value'))), // sort descebdubg by the value property
  splitAt(4), // split to two arrays [4 highest][the rest]
  converge(concat, [ // combine the highest and the others object
    head,
    // combine the rest to the others object wrapped in an array
    pipe(last, combineProp('value'), assoc('country', 'others'), of)
  ])
)

const countries = [{ country: 'France', value: 100 }, { country: 'France', value: 100 }, { country: 'Romania', value: 500 }, { country: 'England', value: 400 }, { country: 'England', value: 400 }, { country: 'Spain', value: 130 }, { country: 'Albania', value: 4 }, { country: 'Hungary', value: 3 }]

const result = getTop5(countries)

console.log(result)
<script src="https://cdnjs.cloudflare.com/ajax/libs/ramda/0.26.1/ramda.js"></script>

我可能会这样做:

const aggregate = R.pipe(
  R.groupBy(R.prop('country')),
  R.toPairs,
  R.map(
    R.applySpec({ 
      country: R.head, 
      value: R.pipe(R.last, R.pluck('value'), R.sum),
    }),
  ),
  R.sort(R.descend(R.prop('value'))),
  R.splitAt(4),
  R.over(
    R.lensIndex(1), 
    R.applySpec({ 
      country: R.always('Others'), 
      value: R.pipe(R.pluck('value'), R.sum),
    }),
  ),
  R.unnest,
);

const data = [
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
];

console.log('result', aggregate(data));
<script src="https://cdnjs.cloudflare.com/ajax/libs/ramda/0.26.1/ramda.js"></script>

这里有两个解决方案

我认为第二个更容易理解,尽管它更长

函数 "mergeAllWithKeyBy" 结合了 "R.mergeAll"、"R.mergeWithKey" 和 "R.groupBy" 的功能。

const mergeAllWithKeyBy = R.curry((mergeFn, keyFn, objs) =>
  R.values(R.reduceBy(R.mergeWithKey(mergeFn), {}, keyFn, objs)))

const addValue = (k, l, r) => 
  k === 'value' ? l + r : r

const getTop = 
  R.pipe(
    mergeAllWithKeyBy(addValue, R.prop('country')),
    R.sort(R.descend(R.prop('value'))),
    R.splitAt(4),
    R.adjust(-1, R.map(R.assoc('country', 'Others'))),
    R.unnest,
    mergeAllWithKeyBy(addValue, R.prop('country')),
  )
  
const data = [
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
]

console.log(getTop(data))
<script src="//cdn.jsdelivr.net/npm/ramda@latest/dist/ramda.min.js"></script>

const getTop = (data) => {
  const getCountryValue =
    R.prop(R.__, R.reduceBy((y, x) => y + x.value, 0, R.prop('country'), data))
    
  const countries = 
    R.uniq(R.pluck('country', data))
  
  const [topCounties, bottomCountries] = 
    R.splitAt(4, R.sort(R.descend(getCountryValue), countries))
  
  const others = {
    country: 'Others', 
    value: R.sum(R.map(getCountryValue, bottomCountries))
  }
  
  const top =
    R.map(R.applySpec({country: R.identity, value: getCountryValue}), topCounties)
  
  return R.append(others, top)
}

const data = [
  { country: 'France', value: 100 },
  { country: 'France', value: 100 },
  { country: 'Romania', value: 500 },
  { country: 'England', value: 400 },
  { country: 'England', value: 400 },
  { country: 'Spain', value: 130 },
  { country: 'Albania', value: 4 },
  { country: 'Hungary', value: 3 }
]

console.log(getTop(data))
<script src="//cdn.jsdelivr.net/npm/ramda@latest/dist/ramda.min.js"></script>