将数据框中的单元格聚合成一个序列
Aggregating cells in dataframe into a sequence
我正在尝试在 R 中转换此数据框:
tableDataSortedCols <- data.frame(
"groups" = c(1,1,1,1,1,1,
1,1,1,1,1,1,
2,2,2,2,2,2,
2,2,2,2,2,2,
2,2,2,2,2,2,
3,3,3,3,3,3),
"element" = c("A","A","A","A","A","A",
"B","B","B","B","B","B",
"C","C","C","C","C","C",
"D","D","D","D","D","D",
"E","E","E","E","E","E",
"F","F","F","F","F","F"),
"hours" = c(0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20),
"values" = c(123.0,124.0,123.5,125.0,123.0,123.0,
223.0,224.0,223.5,225.0,223.0,223.0,
223.1,223.1,223.1,223.5,223.1,223.2,
233.1,234.0,233.5,235.0,233.0,233.0,
323.0,324.0,323.5,325.0,323.0,323.0,
523.0,524.0,523.5,525.0,523.0,523.0)
)
进入这个:
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 D 233.1 234.0 233.5 235.0 233.0 233.0
5 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0
collapsing/aggregating 小时和值对进入称为 timeseries
的序列,其中 timeseries
序列中的每个元素对应一个 hour
值 0h、4h、8h, 12 点、16 点、20 点。
这是我目前所做的:
collapse_rows_df <- function(df, variable){
group_var <- enquo(variable)
df %>%
group_by(!! group_var) %>%
mutate(groupRow = 1:n()) %>%
ungroup() %>%
mutate(!!quo_name(group_var) := ifelse(groupRow == 1, as.character(!! group_var), "")) %>%
select(-c(groupRow))
}
tableOut <- tableDataSortedCols %>%
group_by(groups) %>%
select(groups, everything()) %>%
distinct %>%
collapse_rows_df(groups) %>%
formattable()
您能提出实现该目标的方法吗?
library(tidyverse)
make_timeseries = function(hours, values) {
paste(values[order(hours)], collapse = " ")
}
tableDataSortedCols %>%
group_by(groups, element) %>%
summarise(timeseries = make_timeseries(hours, values))
library(tidyverse)
tableDataSortedCols %>%
mutate(values = format(values)) %>%
arrange(groups, element, hours) %>%
group_by(groups, element) %>%
spread(hours, values) %>%
unite(timeseries, 3:8, sep = " ")
您可以使用 data.table()
中的 dcast()
。
> dcast.data.table(data.table(tableDataSortedCols), ... ~ hours,
+ value.var=c("values"))
groups element 0 4 8 12 16 20
1: 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2: 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3: 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4: 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5: 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6: 3 F 523.0 524.0 523.5 525.0 523.0 523.0
要合并您可以执行的列
library(data.table)
tableDataSortedCols.1 <- dcast.data.table(data.table(tableDataSortedCols),
... ~ hours, value.var=c("values"))
tableDataSortedCols.1 <- as.data.frame(tableDataSortedCols.1) # to get back a data frame.
out <- data.frame(tableDataSortedCols.1[, 1:2],
timeseries=apply(tableDataSortedCols.1[, 3:8], 1,
function(x) paste0(sprintf("%.1f", x),
collapse=" ")))
产量:
> out
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0
我正在尝试在 R 中转换此数据框:
tableDataSortedCols <- data.frame(
"groups" = c(1,1,1,1,1,1,
1,1,1,1,1,1,
2,2,2,2,2,2,
2,2,2,2,2,2,
2,2,2,2,2,2,
3,3,3,3,3,3),
"element" = c("A","A","A","A","A","A",
"B","B","B","B","B","B",
"C","C","C","C","C","C",
"D","D","D","D","D","D",
"E","E","E","E","E","E",
"F","F","F","F","F","F"),
"hours" = c(0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20,
0,4,8,12,16,20),
"values" = c(123.0,124.0,123.5,125.0,123.0,123.0,
223.0,224.0,223.5,225.0,223.0,223.0,
223.1,223.1,223.1,223.5,223.1,223.2,
233.1,234.0,233.5,235.0,233.0,233.0,
323.0,324.0,323.5,325.0,323.0,323.0,
523.0,524.0,523.5,525.0,523.0,523.0)
)
进入这个:
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 D 233.1 234.0 233.5 235.0 233.0 233.0
5 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0
collapsing/aggregating 小时和值对进入称为 timeseries
的序列,其中 timeseries
序列中的每个元素对应一个 hour
值 0h、4h、8h, 12 点、16 点、20 点。
这是我目前所做的:
collapse_rows_df <- function(df, variable){
group_var <- enquo(variable)
df %>%
group_by(!! group_var) %>%
mutate(groupRow = 1:n()) %>%
ungroup() %>%
mutate(!!quo_name(group_var) := ifelse(groupRow == 1, as.character(!! group_var), "")) %>%
select(-c(groupRow))
}
tableOut <- tableDataSortedCols %>%
group_by(groups) %>%
select(groups, everything()) %>%
distinct %>%
collapse_rows_df(groups) %>%
formattable()
您能提出实现该目标的方法吗?
library(tidyverse)
make_timeseries = function(hours, values) {
paste(values[order(hours)], collapse = " ")
}
tableDataSortedCols %>%
group_by(groups, element) %>%
summarise(timeseries = make_timeseries(hours, values))
library(tidyverse)
tableDataSortedCols %>%
mutate(values = format(values)) %>%
arrange(groups, element, hours) %>%
group_by(groups, element) %>%
spread(hours, values) %>%
unite(timeseries, 3:8, sep = " ")
您可以使用 data.table()
中的 dcast()
。
> dcast.data.table(data.table(tableDataSortedCols), ... ~ hours,
+ value.var=c("values"))
groups element 0 4 8 12 16 20
1: 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2: 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3: 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4: 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5: 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6: 3 F 523.0 524.0 523.5 525.0 523.0 523.0
要合并您可以执行的列
library(data.table)
tableDataSortedCols.1 <- dcast.data.table(data.table(tableDataSortedCols),
... ~ hours, value.var=c("values"))
tableDataSortedCols.1 <- as.data.frame(tableDataSortedCols.1) # to get back a data frame.
out <- data.frame(tableDataSortedCols.1[, 1:2],
timeseries=apply(tableDataSortedCols.1[, 3:8], 1,
function(x) paste0(sprintf("%.1f", x),
collapse=" ")))
产量:
> out
groups element timeseries
1 1 A 123.0 124.0 123.5 125.0 123.0 123.0
2 1 B 223.0 224.0 223.5 225.0 223.0 223.0
3 2 C 223.1 223.1 223.1 223.5 223.1 223.2
4 2 D 233.1 234.0 233.5 235.0 233.0 233.0
5 2 E 323.0 324.0 323.5 325.0 323.0 323.0
6 3 F 523.0 524.0 523.5 525.0 523.0 523.0