如何组合列的成员,将它们收集在数据框中并在 R 中给它们一个新名称?
How to combine member of a column, collect them in a data frame and give them a new name, in R?
我想基于我的旧数据框创建一个新的数据框并合并特定列的成员,同时我给它们一个新名称:
例如,这是我的旧数据框:
df <- structure(list(ID= c("x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3", "x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3"),
col1=c("a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2"),
col2= c("a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f"),
col3= c(2,13,1,21,0,5,3,0,6,4,50,0,0,0,0,9,5,0,51,3,6,0,0,9,89,4,29,1,4,17,6,16,9,1,0,0)),
class = "data.frame", row.names = c(NA,-36L))
并且对于新数据框,我希望有一个基于 col2 的新列,因此结合 abc,其中有任何一个a或b或c,命名为abc.1.合并有d或e的de,命名为de.5 最后在 f 的位置将其命名为 f.10。对于 new.col3 它们的 value 在旧 col3.
中的 SUM
结果将是:
df2<- structure(list(col1=c("a1","a1","a1","a2","a2","a2"),
new.col2= c("abc.1", "de.5", "f.10", "abc.1", "de.5", "f.10"),
new.col3=c(25,89,5,213,6,26)),
class = "data.frame", row.names = c(NA,-6L))
使用 case_when
创建组,然后使用 summarise
按组折叠行并按组计算 col3
的总和。
library(dplyr)
df %>%
group_by(col1, gp = case_when(col2 %in% c("a", "b", "c") ~ 1,
col2 %in% c("d", "e") ~ 5,
col2 == "f" ~ 10)) %>%
summarise(new.col2 = paste(paste0(unique(col2), collapse = ""), unique(gp), sep = "."),
new.col3 = sum(col3))
输出
# A tibble: 6 × 4
# Groups: col1 [2]
col1 gp new.col2 new.col3
<chr> <dbl> <chr> <dbl>
1 a1 1 abc.1 25
2 a1 5 de.5 89
3 a1 10 f.10 5
4 a2 1 abc.1 213
5 a2 5 de.5 6
6 a2 10 f.10 26
使用data.table
,
df[,new.col2:=fcase(col2 %chin% c('a','b','c'),'abc.1',
col2 %chin% c('d','e'),'de.5',
col2 == 'f','f.10')][,.(new.col3=sum(col3)),by=.(col1,new.col2)]
输出
col1 new.col2 new.col3
1: a1 abc.1 25
2: a1 de.5 89
3: a1 f.10 5
4: a2 abc.1 213
5: a2 de.5 6
6: a2 f.10 26
我想基于我的旧数据框创建一个新的数据框并合并特定列的成员,同时我给它们一个新名称: 例如,这是我的旧数据框:
df <- structure(list(ID= c("x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3", "x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3"),
col1=c("a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2"),
col2= c("a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f"),
col3= c(2,13,1,21,0,5,3,0,6,4,50,0,0,0,0,9,5,0,51,3,6,0,0,9,89,4,29,1,4,17,6,16,9,1,0,0)),
class = "data.frame", row.names = c(NA,-36L))
并且对于新数据框,我希望有一个基于 col2 的新列,因此结合 abc,其中有任何一个a或b或c,命名为abc.1.合并有d或e的de,命名为de.5 最后在 f 的位置将其命名为 f.10。对于 new.col3 它们的 value 在旧 col3.
中的 SUM结果将是:
df2<- structure(list(col1=c("a1","a1","a1","a2","a2","a2"),
new.col2= c("abc.1", "de.5", "f.10", "abc.1", "de.5", "f.10"),
new.col3=c(25,89,5,213,6,26)),
class = "data.frame", row.names = c(NA,-6L))
使用 case_when
创建组,然后使用 summarise
按组折叠行并按组计算 col3
的总和。
library(dplyr)
df %>%
group_by(col1, gp = case_when(col2 %in% c("a", "b", "c") ~ 1,
col2 %in% c("d", "e") ~ 5,
col2 == "f" ~ 10)) %>%
summarise(new.col2 = paste(paste0(unique(col2), collapse = ""), unique(gp), sep = "."),
new.col3 = sum(col3))
输出
# A tibble: 6 × 4
# Groups: col1 [2]
col1 gp new.col2 new.col3
<chr> <dbl> <chr> <dbl>
1 a1 1 abc.1 25
2 a1 5 de.5 89
3 a1 10 f.10 5
4 a2 1 abc.1 213
5 a2 5 de.5 6
6 a2 10 f.10 26
使用data.table
,
df[,new.col2:=fcase(col2 %chin% c('a','b','c'),'abc.1',
col2 %chin% c('d','e'),'de.5',
col2 == 'f','f.10')][,.(new.col3=sum(col3)),by=.(col1,new.col2)]
输出
col1 new.col2 new.col3
1: a1 abc.1 25
2: a1 de.5 89
3: a1 f.10 5
4: a2 abc.1 213
5: a2 de.5 6
6: a2 f.10 26