R rbind行到df,结果按名称按组计算多列
R rbind rows to df with results calculated by group for multiple columns by name
如何按 "id" 分组,将一些算术函数应用于最后四列(按组),并将新行添加到包含结果的 df。这是一个包含 5 个样本 (id) 和 8 列的说明性示例:
d1 d2 id type treat v1_gm v2_pct v3_pct
1 info info 1 leaf NA 0.2 70 90
2 info info 1 flower A 0.5 80 80
3 info info 2 leaf NA 0.4 65 80
4 info info 2 flower A 0.1 90 90
5 info info 3 leaf NA 0.6 55 80
6 info info 3 stem A 0.3 80 30
7 info info 4 leaf NA 0.6 30 40
8 info info 4 flower A 0.7 75 75
9 info info 5 leaf/stem NA 0.8 80 75
可重现的例子:
df <- data.frame(matrix(NA, nrow = 9, ncol = 8), row.names=NULL)
colnames(df) <- c("d1","d2","id","type","treat","v1_gm","v2_pct","v3_pct")
df$d1 <- "info"
df$d2 <- "info"
id <- c(1,1,2,2,3,3,4,4,5)
df$id <- c(1,1,2,2,3,3,4,4,5)
df$type <- c("leaf","flower","leaf","flower","leaf","stem","leaf","flower","leaf/stem")
df$treat <- c(NA,"A",NA,"A",NA,"A",NA,"A",NA)
df$v1_gm <- c(0.2,0.5,0.4,0.1,0.6,0.3,0.6,0.7,0.8)
df$v2_pct <- c(70,80,65,90,55,80,30,75,80)
df$v3_pct <- c(90,80,80,90,80,30,40,75,75)
结果 table 应如下所示。第 3、6、9 和 13 行是包含结果的新行。新行可以附加在 table 的末尾,或者放入一个 tmp df 以便稍后使用 rbind 添加(无论哪种方式,我都不知道该怎么做)。分组变量是列 "id"。函数 sum 用于 "v1_gm"。函数 "mean" 用于多个连续的列,此处为 "v1_pct" 和 "v3_pct",应按名称调用(例如,v1_pct:v3_pct)。新行中 "type" 的值是从组行中的 "type" 连接而来的, "d1" 和 "d2" 只是从 treat==[=31 的组行中复制而来=],并且新行中的 "treat" 被赋值为 "cmb"。
d1 d2 id type treat v1_gm v2_pct v3_pct
1 info info 1 leaf NA 0.2 70 90
2 info info 1 flower A 0.5 80 80
3 info info 1 leaf/flower cmb 0.7 75 85
4 info info 2 leaf NA 0.4 65 80
5 info info 2 flower A 0.1 90 90
6 info info 2 leaf/flower cmb 0.5 77.5 85
7 info info 3 leaf NA 0.6 55 80
8 info info 3 stem A 0.3 80 30
9 info info 3 leaf/stem cmb 0.9 67.5 55
10 info info 4 leaf NA 0.6 30 40
11 info info 4 flower A 0.7 75 75
13 info info 4 leaf/flower cmb 1.3 52.5 57.5
14 info info 5 leaf/stem NA 0.8 80 75
我不确定您是否可以将组摘要作为一行添加到数据框中。你应该可以把它作为一个专栏来做。
library("dplyr")
res1 <- df %>% group_by(id) %>% mutate( sumV1 = sum(v1_gm),meanV2 = mean(v2_pct),meanV3 = mean(v3_pct),gr_type = paste(type,collapse="/")) %>% filter(treat == "A") %>% select(d1,d2,id,type,v1_gm=sumV1, v2_pct = meanV2, v3_pct = meanV3,type = gr_type)
这会给你答案然后使用bind_rows你会得到你想要的结果
final_res <- bind_rows(df,res1)
通过对@Arun 的回答进行一些修改,以下脚本完全解决了这个问题。
library("dplyr")
res1 <- df %>%
group_by(id) %>%
mutate(
v1_gm = sum(v1_gm),
v2_pct = mean(v2_pct),
v3_pct = mean(v3_pct),
type = paste(type,collapse="/")) %>%
filter(treat == "A") %>%
mutate(treat = as.character("calculated"))
final_res1 <- bind_rows(df,res1)
final_res1$id <- as.character(final_res1$id)
final_res1 <- final_res1 [order(final_res1$id, final_res1$treat, na.last=FALSE), ]
如何按 "id" 分组,将一些算术函数应用于最后四列(按组),并将新行添加到包含结果的 df。这是一个包含 5 个样本 (id) 和 8 列的说明性示例:
d1 d2 id type treat v1_gm v2_pct v3_pct
1 info info 1 leaf NA 0.2 70 90
2 info info 1 flower A 0.5 80 80
3 info info 2 leaf NA 0.4 65 80
4 info info 2 flower A 0.1 90 90
5 info info 3 leaf NA 0.6 55 80
6 info info 3 stem A 0.3 80 30
7 info info 4 leaf NA 0.6 30 40
8 info info 4 flower A 0.7 75 75
9 info info 5 leaf/stem NA 0.8 80 75
可重现的例子:
df <- data.frame(matrix(NA, nrow = 9, ncol = 8), row.names=NULL)
colnames(df) <- c("d1","d2","id","type","treat","v1_gm","v2_pct","v3_pct")
df$d1 <- "info"
df$d2 <- "info"
id <- c(1,1,2,2,3,3,4,4,5)
df$id <- c(1,1,2,2,3,3,4,4,5)
df$type <- c("leaf","flower","leaf","flower","leaf","stem","leaf","flower","leaf/stem")
df$treat <- c(NA,"A",NA,"A",NA,"A",NA,"A",NA)
df$v1_gm <- c(0.2,0.5,0.4,0.1,0.6,0.3,0.6,0.7,0.8)
df$v2_pct <- c(70,80,65,90,55,80,30,75,80)
df$v3_pct <- c(90,80,80,90,80,30,40,75,75)
结果 table 应如下所示。第 3、6、9 和 13 行是包含结果的新行。新行可以附加在 table 的末尾,或者放入一个 tmp df 以便稍后使用 rbind 添加(无论哪种方式,我都不知道该怎么做)。分组变量是列 "id"。函数 sum 用于 "v1_gm"。函数 "mean" 用于多个连续的列,此处为 "v1_pct" 和 "v3_pct",应按名称调用(例如,v1_pct:v3_pct)。新行中 "type" 的值是从组行中的 "type" 连接而来的, "d1" 和 "d2" 只是从 treat==[=31 的组行中复制而来=],并且新行中的 "treat" 被赋值为 "cmb"。
d1 d2 id type treat v1_gm v2_pct v3_pct
1 info info 1 leaf NA 0.2 70 90
2 info info 1 flower A 0.5 80 80
3 info info 1 leaf/flower cmb 0.7 75 85
4 info info 2 leaf NA 0.4 65 80
5 info info 2 flower A 0.1 90 90
6 info info 2 leaf/flower cmb 0.5 77.5 85
7 info info 3 leaf NA 0.6 55 80
8 info info 3 stem A 0.3 80 30
9 info info 3 leaf/stem cmb 0.9 67.5 55
10 info info 4 leaf NA 0.6 30 40
11 info info 4 flower A 0.7 75 75
13 info info 4 leaf/flower cmb 1.3 52.5 57.5
14 info info 5 leaf/stem NA 0.8 80 75
我不确定您是否可以将组摘要作为一行添加到数据框中。你应该可以把它作为一个专栏来做。
library("dplyr")
res1 <- df %>% group_by(id) %>% mutate( sumV1 = sum(v1_gm),meanV2 = mean(v2_pct),meanV3 = mean(v3_pct),gr_type = paste(type,collapse="/")) %>% filter(treat == "A") %>% select(d1,d2,id,type,v1_gm=sumV1, v2_pct = meanV2, v3_pct = meanV3,type = gr_type)
这会给你答案然后使用bind_rows你会得到你想要的结果
final_res <- bind_rows(df,res1)
通过对@Arun 的回答进行一些修改,以下脚本完全解决了这个问题。
library("dplyr")
res1 <- df %>%
group_by(id) %>%
mutate(
v1_gm = sum(v1_gm),
v2_pct = mean(v2_pct),
v3_pct = mean(v3_pct),
type = paste(type,collapse="/")) %>%
filter(treat == "A") %>%
mutate(treat = as.character("calculated"))
final_res1 <- bind_rows(df,res1)
final_res1$id <- as.character(final_res1$id)
final_res1 <- final_res1 [order(final_res1$id, final_res1$treat, na.last=FALSE), ]