将数据框中的单元格聚合成一个序列

Aggregating cells in dataframe into a sequence

我正在尝试在 R 中转换此数据框:

tableDataSortedCols <- data.frame(
    "groups" = c(1,1,1,1,1,1,
                 1,1,1,1,1,1,
                 2,2,2,2,2,2,
                 2,2,2,2,2,2,
                 2,2,2,2,2,2,
                 3,3,3,3,3,3), 
    "element" = c("A","A","A","A","A","A",
                  "B","B","B","B","B","B",
                  "C","C","C","C","C","C",
                  "D","D","D","D","D","D",
                  "E","E","E","E","E","E",
                  "F","F","F","F","F","F"), 
    "hours" = c(0,4,8,12,16,20,
                0,4,8,12,16,20,
                0,4,8,12,16,20,
                0,4,8,12,16,20,
                0,4,8,12,16,20,
                0,4,8,12,16,20),
    "values" = c(123.0,124.0,123.5,125.0,123.0,123.0,
                 223.0,224.0,223.5,225.0,223.0,223.0,
                 223.1,223.1,223.1,223.5,223.1,223.2,
                 233.1,234.0,233.5,235.0,233.0,233.0,
                 323.0,324.0,323.5,325.0,323.0,323.0,
                 523.0,524.0,523.5,525.0,523.0,523.0)
)

进入这个:

  groups element   timeseries
1      1       A   123.0 124.0 123.5 125.0 123.0 123.0
2              B   223.0 224.0 223.5 225.0 223.0 223.0
3      2       C   223.1 223.1 223.1 223.5 223.1 223.2
4              D   233.1 234.0 233.5 235.0 233.0 233.0
5              E   323.0 324.0 323.5 325.0 323.0 323.0
6      3       F   523.0 524.0 523.5 525.0 523.0 523.0

collapsing/aggregating 小时和值对进入称为 timeseries 的序列,其中 timeseries 序列中的每个元素对应一个 hour 值 0h、4h、8h, 12 点、16 点、20 点。

这是我目前所做的:

collapse_rows_df <- function(df, variable){
  group_var <- enquo(variable)

  df %>%
    group_by(!! group_var) %>%
    mutate(groupRow = 1:n()) %>%
    ungroup() %>%
    mutate(!!quo_name(group_var) := ifelse(groupRow == 1, as.character(!! group_var), "")) %>%
    select(-c(groupRow))
}     

tableOut <- tableDataSortedCols %>%
  group_by(groups) %>%
  select(groups, everything()) %>%
  distinct %>%
  collapse_rows_df(groups) %>%
  formattable()

您能提出实现该目标的方法吗?

library(tidyverse)
make_timeseries = function(hours, values) {
  paste(values[order(hours)], collapse = " ")
}
tableDataSortedCols %>%
  group_by(groups, element) %>%
  summarise(timeseries = make_timeseries(hours, values))
library(tidyverse)

tableDataSortedCols %>% 
  mutate(values = format(values)) %>%
  arrange(groups, element, hours) %>%
  group_by(groups, element) %>%
  spread(hours, values) %>%
  unite(timeseries, 3:8, sep = " ")

您可以使用 data.table() 中的 dcast()

> dcast.data.table(data.table(tableDataSortedCols), ... ~ hours, 
+                  value.var=c("values"))
   groups element     0     4     8    12    16    20
1:      1       A 123.0 124.0 123.5 125.0 123.0 123.0
2:      1       B 223.0 224.0 223.5 225.0 223.0 223.0
3:      2       C 223.1 223.1 223.1 223.5 223.1 223.2
4:      2       D 233.1 234.0 233.5 235.0 233.0 233.0
5:      2       E 323.0 324.0 323.5 325.0 323.0 323.0
6:      3       F 523.0 524.0 523.5 525.0 523.0 523.0

要合并您可以执行的列

library(data.table)
tableDataSortedCols.1 <- dcast.data.table(data.table(tableDataSortedCols), 
                                          ... ~ hours, value.var=c("values"))

tableDataSortedCols.1 <- as.data.frame(tableDataSortedCols.1)  # to get back a data frame.

out <- data.frame(tableDataSortedCols.1[, 1:2],
                  timeseries=apply(tableDataSortedCols.1[, 3:8], 1, 
                                   function(x) paste0(sprintf("%.1f", x), 
                                                      collapse=" ")))

产量:

> out
  groups element                          timeseries
1      1       A 123.0 124.0 123.5 125.0 123.0 123.0
2      1       B 223.0 224.0 223.5 225.0 223.0 223.0
3      2       C 223.1 223.1 223.1 223.5 223.1 223.2
4      2       D 233.1 234.0 233.5 235.0 233.0 233.0
5      2       E 323.0 324.0 323.5 325.0 323.0 323.0
6      3       F 523.0 524.0 523.5 525.0 523.0 523.0