如何组合一列的成员,然后计算 R 中的其他列?
How to combine member of one column and then count other columns in R?
我有一个数据框:
df <- structure(list(ID = c("x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3", "x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3"), col1=c("a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2"), col2 = c("a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f"), col3 = c(2,13,1,21,0,5,3,0,6,4,50,0,0,0,0,9,5,0,51,3,6,0,0,9,89,4,29,1,4,17,6,16,9,1,0,0)),
class = "data.frame", row.names = c(NA,-36L))
ID col1 col2 col3
x1 a1 a 2
x1 a1 b 13
x1 a1 c 1
x1 a1 d 21
x1 a1 e 0
x1 a1 f 5
x2 a1 a 3
x2 a1 b 0
x2 a1 c 6
x2 a1 d 4
x2 a1 e 50
x2 a1 f 0
x3 a1 a 0
x3 a1 b 0
x3 a1 c 0
x3 a1 d 9
x3 a1 e 5
x3 a1 f 0
x1 a2 a 51
x1 a2 b 3
x1 a2 c 6
x1 a2 d 0
x1 a2 e 0
x1 a2 f 9
x2 a2 a 89
x2 a2 b 4
x2 a2 c 29
x2 a2 d 1
x2 a2 e 4
x2 a2 f 17
x3 a2 a 6
x3 a2 b 16
x3 a2 c 9
x3 a2 d 1
x3 a2 e 0
x3 a2 f 0
我想计算“a”、“b”或“c”>0(大于零),然后是“d”或“e”>0,最后是“f”> 的唯一 ID 0。
然后在不同的列中分别获取所有 (abc)、(de) 和 (f) 的总和。
所以结果如下所示:
df2<- structure(list(col1=c("a1","a1","a1","a2","a2","a2"), col2 = c("abc", "de", "f", "abc", "de", "f"), count.ID = c(2,3,1,3,2,2), total=c(25,89,5,213,6,26)),
class = "data.frame", row.names = c(NA,-6L))
col1 col2 count.ID total
a1 abc 2 25
a1 de 3 89
a1 f 1 5
a2 abc 3 213
a2 de 2 6
a2 f 2 26
这在 R 中怎么可能?
谢谢
一个前提是创建一个分组变量框架,将旧的 col2
映射到新的组合 col2
,然后将其 merge
/join
映射到原始的数据.
dplyr
library(dplyr)
groups <- data.frame(col2=c("a","b","c","d","e","f"), col2b=c("abc","abc","abc","de","de","f"))
left_join(df, groups, by = "col2") %>%
group_by(col1, col2 = col2b) %>%
summarize(count.ID = length(unique(ID[col3 > 0])), total = sum(col3)) %>%
ungroup()
# # A tibble: 6 x 4
# col1 col2 count.ID total
# <chr> <chr> <int> <dbl>
# 1 a1 abc 2 25
# 2 a1 de 3 89
# 3 a1 f 1 5
# 4 a2 abc 3 213
# 5 a2 de 2 6
# 6 a2 f 2 26
我有一个数据框:
df <- structure(list(ID = c("x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3", "x1", "x1", "x1", "x1", "x1", "x1", "x2", "x2", "x2", "x2", "x2", "x2", "x3", "x3", "x3", "x3", "x3", "x3"), col1=c("a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a1","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2","a2"), col2 = c("a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "e", "f","a", "b", "c", "d", "e", "f"), col3 = c(2,13,1,21,0,5,3,0,6,4,50,0,0,0,0,9,5,0,51,3,6,0,0,9,89,4,29,1,4,17,6,16,9,1,0,0)),
class = "data.frame", row.names = c(NA,-36L))
ID col1 col2 col3
x1 a1 a 2
x1 a1 b 13
x1 a1 c 1
x1 a1 d 21
x1 a1 e 0
x1 a1 f 5
x2 a1 a 3
x2 a1 b 0
x2 a1 c 6
x2 a1 d 4
x2 a1 e 50
x2 a1 f 0
x3 a1 a 0
x3 a1 b 0
x3 a1 c 0
x3 a1 d 9
x3 a1 e 5
x3 a1 f 0
x1 a2 a 51
x1 a2 b 3
x1 a2 c 6
x1 a2 d 0
x1 a2 e 0
x1 a2 f 9
x2 a2 a 89
x2 a2 b 4
x2 a2 c 29
x2 a2 d 1
x2 a2 e 4
x2 a2 f 17
x3 a2 a 6
x3 a2 b 16
x3 a2 c 9
x3 a2 d 1
x3 a2 e 0
x3 a2 f 0
我想计算“a”、“b”或“c”>0(大于零),然后是“d”或“e”>0,最后是“f”> 的唯一 ID 0。 然后在不同的列中分别获取所有 (abc)、(de) 和 (f) 的总和。 所以结果如下所示:
df2<- structure(list(col1=c("a1","a1","a1","a2","a2","a2"), col2 = c("abc", "de", "f", "abc", "de", "f"), count.ID = c(2,3,1,3,2,2), total=c(25,89,5,213,6,26)),
class = "data.frame", row.names = c(NA,-6L))
col1 col2 count.ID total
a1 abc 2 25
a1 de 3 89
a1 f 1 5
a2 abc 3 213
a2 de 2 6
a2 f 2 26
这在 R 中怎么可能? 谢谢
一个前提是创建一个分组变量框架,将旧的 col2
映射到新的组合 col2
,然后将其 merge
/join
映射到原始的数据.
dplyr
library(dplyr)
groups <- data.frame(col2=c("a","b","c","d","e","f"), col2b=c("abc","abc","abc","de","de","f"))
left_join(df, groups, by = "col2") %>%
group_by(col1, col2 = col2b) %>%
summarize(count.ID = length(unique(ID[col3 > 0])), total = sum(col3)) %>%
ungroup()
# # A tibble: 6 x 4
# col1 col2 count.ID total
# <chr> <chr> <int> <dbl>
# 1 a1 abc 2 25
# 2 a1 de 3 89
# 3 a1 f 1 5
# 4 a2 abc 3 213
# 5 a2 de 2 6
# 6 a2 f 2 26