异常数据分组/转换
Unusual data grouping / transformation
我正在努力解决一个算法问题,如何转换或分组数据以获得指定的输出。
我的输入是一堆按给定顺序排列的消息(从最新到最旧):
[
{
"id":5,
"created_at":"2021-01-01 00:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":4,
"created_at":"2021-01-01 01:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":3,
"created_at":"2021-01-01 03:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
},
{
"id":2,
"created_at":"2021-01-01 04:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
},
{
"id":1,
"created_at":"2021-01-01 05:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":0,
"created_at":"2021-01-01 06:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
}
]
转换后的期望输出:
[
{
"is_author":true,
"messages":[
{
"id":5,
"created_at":"2021-01-01 00:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
},
{
"id":4,
"created_at":"2021-01-01 01:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":false,
"messages":[
{
"id":3,
"created_at":"2021-01-01 03:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
},
{
"id":2,
"created_at":"2021-01-01 04:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":true,
"messages":[
{
"id":1,
"created_at":"2021-01-01 05:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":false,
"messages":[
{
"id":0,
"created_at":"2021-01-01 06:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
}
]
如您所见,每次出现 is_author 参数都会创建一个新组来收集来自该作者的消息?
在JS或PHP中是否有任何有效的解决方案来转换此类数据?
const data=[{"id":5,"created_at":"2021-01-01 00:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":4,"created_at":"2021-01-01 01:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":3,"created_at":"2021-01-01 03:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}},{"id":2,"created_at":"2021-01-01 04:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}},{"id":1,"created_at":"2021-01-01 05:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":0,"created_at":"2021-01-01 06:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}}];
const result = data.reduce((acc, item) => {
const { is_author, ...rest } = item;
// if is_author match with previous object
if(acc.length && acc[acc.length-1].is_author === is_author){
// copy previous messages, along with current item
acc[acc.length-1].messages = [...acc[acc.length-1].messages, rest ];
}else{
acc.push({ is_author, messages: [ rest ] });
}
return acc;
}, []);
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0 }
抽象一点可以说很多。在这里,我们希望在两个连续元素之间发生变化时拆分我们的数组。这是基本的分组,如果我们这样想,我们可以将它们的集合分层到该分组之上的单个 {is_author, messages}
对象中。
因此,我们可能会编写一个通用函数,将我们的数据拆分为子数组,只要之前和当前项目的某些函数 returns true
。然后我们的主函数会调用它,传递一个函数来测试 is_author
属性 是否不同,然后在 returns 之后重新格式化生成的组。它可能看起来像这样:
const splitWhenever = (pred) => (xs) =>
xs .length == 0 ? [] : xs .slice (1) .reduce (
((xss, x, i) => pred (xs [i], x)
? [...xss, [x]]
: [...xss .slice (0, -1), [... xss [xss .length - 1], x]]
), [[xs [0]]]
)
const transform = (input) => splitWhenever ((x, y) => x.is_author != y.is_author) (input)
.map ((xs => ({
is_author: xs [0] .is_author,
messages: xs .map (({is_author, ...rest}) => rest)
})))
const input = [{id: 5, created_at: "2021-01-01 00:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 4, created_at: "2021-01-01 01:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 3, created_at: "2021-01-01 03:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 2, created_at: "2021-01-01 04:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 1, created_at: "2021-01-01 05:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 0, created_at: "2021-01-01 06:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}]
console .log (transform (input))
.as-console-wrapper {max-height: 100% !important; top: 0}
这比 navnath 的答案更复杂。但它建立在 splitWherever
的基础上,现在可以在此程序和其他程序中重复使用。
在 navnath 的回答中,OP 问道,“[W]如果我处理了一组数据并且应用程序加载了新数据 [...] 是否可以加入来自对象 id 6 到现有组?”我在那里评论了一个建议的更改,只对 navanth 的代码进行了微小的调整。但是重读之后,我认为这个新数据应该在现有数据之前。(因为现有的 id 是降序排列的,而新的比那些大。)这意味着我的建议可能不会这样做。
这里需要做更多的工作,因为我们已经将分组与重新格式化分开了。这个版本仍然使用相同的通用 splitWhenever
,首先将现有结构展平为原始格式,添加新数据,然后 运行 重新开始。这听起来很浪费。也许是。更好的方法可能是简单地保留原始列表,在其前面添加,然后重新 运行 上面的方法。但由于我们的转换是可逆的,因此如果需要,这将起作用:
const splitWhenever = (fn) => (xs) =>
xs .length == 0 ? [] : xs .slice (1) .reduce (
((xss, x, i) => fn (xs [i], x)
? [...xss, [x]]
: [...xss .slice (0, -1), [... xss [xss .length - 1], x]]
), [[xs [0]]]
)
const transform = (input, old = []) => splitWhenever ((x, y) => x.is_author != y.is_author) ([
...input,
...old.flatMap (({is_author, messages}) => messages .map (msg => ({...msg, is_author})))
]).map ((xs => ({
is_author: xs [0] .is_author,
messages: xs .map (({is_author, ...rest}) => rest)
})))
const input = [{id: 5, created_at: "2021-01-01 00:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 4, created_at: "2021-01-01 01:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 3, created_at: "2021-01-01 03:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 2, created_at: "2021-01-01 04:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 1, created_at: "2021-01-01 05:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 0, created_at: "2021-01-01 06:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}]
const result = transform (input)
console .log ('Original data')
console .log (result)
const additional = [{id: 7, created_at: "2020-12-31 22:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 6, created_at: "2020-12-31 22:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}]
const result2 = transform (additional, result)
console .log ('With additional results')
console .log (result2)
.as-console-wrapper {max-height: 100% !important; top: 0}
我正在努力解决一个算法问题,如何转换或分组数据以获得指定的输出。
我的输入是一堆按给定顺序排列的消息(从最新到最旧):
[
{
"id":5,
"created_at":"2021-01-01 00:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":4,
"created_at":"2021-01-01 01:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":3,
"created_at":"2021-01-01 03:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
},
{
"id":2,
"created_at":"2021-01-01 04:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
},
{
"id":1,
"created_at":"2021-01-01 05:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":true,
"meta_data":{
}
},
{
"id":0,
"created_at":"2021-01-01 06:00:00",
"message":"Lorem ipsum dolor sit amet...",
"is_author":false,
"meta_data":{
}
}
]
转换后的期望输出:
[
{
"is_author":true,
"messages":[
{
"id":5,
"created_at":"2021-01-01 00:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
},
{
"id":4,
"created_at":"2021-01-01 01:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":false,
"messages":[
{
"id":3,
"created_at":"2021-01-01 03:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
},
{
"id":2,
"created_at":"2021-01-01 04:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":true,
"messages":[
{
"id":1,
"created_at":"2021-01-01 05:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
},
{
"is_author":false,
"messages":[
{
"id":0,
"created_at":"2021-01-01 06:00:00",
"message":"Lorem ipsum dolor sit amet...",
"meta_data":{
}
}
]
}
]
如您所见,每次出现 is_author 参数都会创建一个新组来收集来自该作者的消息?
在JS或PHP中是否有任何有效的解决方案来转换此类数据?
const data=[{"id":5,"created_at":"2021-01-01 00:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":4,"created_at":"2021-01-01 01:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":3,"created_at":"2021-01-01 03:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}},{"id":2,"created_at":"2021-01-01 04:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}},{"id":1,"created_at":"2021-01-01 05:00:00","message":"Lorem ipsum dolor sit amet...","is_author":true,"meta_data":{}},{"id":0,"created_at":"2021-01-01 06:00:00","message":"Lorem ipsum dolor sit amet...","is_author":false,"meta_data":{}}];
const result = data.reduce((acc, item) => {
const { is_author, ...rest } = item;
// if is_author match with previous object
if(acc.length && acc[acc.length-1].is_author === is_author){
// copy previous messages, along with current item
acc[acc.length-1].messages = [...acc[acc.length-1].messages, rest ];
}else{
acc.push({ is_author, messages: [ rest ] });
}
return acc;
}, []);
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0 }
抽象一点可以说很多。在这里,我们希望在两个连续元素之间发生变化时拆分我们的数组。这是基本的分组,如果我们这样想,我们可以将它们的集合分层到该分组之上的单个 {is_author, messages}
对象中。
因此,我们可能会编写一个通用函数,将我们的数据拆分为子数组,只要之前和当前项目的某些函数 returns true
。然后我们的主函数会调用它,传递一个函数来测试 is_author
属性 是否不同,然后在 returns 之后重新格式化生成的组。它可能看起来像这样:
const splitWhenever = (pred) => (xs) =>
xs .length == 0 ? [] : xs .slice (1) .reduce (
((xss, x, i) => pred (xs [i], x)
? [...xss, [x]]
: [...xss .slice (0, -1), [... xss [xss .length - 1], x]]
), [[xs [0]]]
)
const transform = (input) => splitWhenever ((x, y) => x.is_author != y.is_author) (input)
.map ((xs => ({
is_author: xs [0] .is_author,
messages: xs .map (({is_author, ...rest}) => rest)
})))
const input = [{id: 5, created_at: "2021-01-01 00:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 4, created_at: "2021-01-01 01:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 3, created_at: "2021-01-01 03:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 2, created_at: "2021-01-01 04:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 1, created_at: "2021-01-01 05:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 0, created_at: "2021-01-01 06:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}]
console .log (transform (input))
.as-console-wrapper {max-height: 100% !important; top: 0}
这比 navnath 的答案更复杂。但它建立在 splitWherever
的基础上,现在可以在此程序和其他程序中重复使用。
在 navnath 的回答中,OP 问道,“[W]如果我处理了一组数据并且应用程序加载了新数据 [...] 是否可以加入来自对象 id 6 到现有组?”我在那里评论了一个建议的更改,只对 navanth 的代码进行了微小的调整。但是重读之后,我认为这个新数据应该在现有数据之前。(因为现有的 id 是降序排列的,而新的比那些大。)这意味着我的建议可能不会这样做。
这里需要做更多的工作,因为我们已经将分组与重新格式化分开了。这个版本仍然使用相同的通用 splitWhenever
,首先将现有结构展平为原始格式,添加新数据,然后 运行 重新开始。这听起来很浪费。也许是。更好的方法可能是简单地保留原始列表,在其前面添加,然后重新 运行 上面的方法。但由于我们的转换是可逆的,因此如果需要,这将起作用:
const splitWhenever = (fn) => (xs) =>
xs .length == 0 ? [] : xs .slice (1) .reduce (
((xss, x, i) => fn (xs [i], x)
? [...xss, [x]]
: [...xss .slice (0, -1), [... xss [xss .length - 1], x]]
), [[xs [0]]]
)
const transform = (input, old = []) => splitWhenever ((x, y) => x.is_author != y.is_author) ([
...input,
...old.flatMap (({is_author, messages}) => messages .map (msg => ({...msg, is_author})))
]).map ((xs => ({
is_author: xs [0] .is_author,
messages: xs .map (({is_author, ...rest}) => rest)
})))
const input = [{id: 5, created_at: "2021-01-01 00:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 4, created_at: "2021-01-01 01:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 3, created_at: "2021-01-01 03:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 2, created_at: "2021-01-01 04:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 1, created_at: "2021-01-01 05:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}, {id: 0, created_at: "2021-01-01 06:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}]
const result = transform (input)
console .log ('Original data')
console .log (result)
const additional = [{id: 7, created_at: "2020-12-31 22:00:00", message: "Lorem ipsum dolor sit amet...", is_author: false, meta_data: {}}, {id: 6, created_at: "2020-12-31 22:00:00", message: "Lorem ipsum dolor sit amet...", is_author: true, meta_data: {}}]
const result2 = transform (additional, result)
console .log ('With additional results')
console .log (result2)
.as-console-wrapper {max-height: 100% !important; top: 0}