将多行与 R 中的相同字段合并
combine multiple rows with same field in R
我有一个数据集如下
V1 <- c(5,5,5,45,45,77)
V2 <- c("low", "low", "medium", "low", "low", "high")
V3 <- c(10,3,6,10,3,1)
df <- cbind.data.frame(V1,V3,V2)
v1 v2 v3
5 10 low
5 3 low
5 6 medium
45 10 low
45 3 low
77 1 high
我希望它是
v1 low medium high
5 13 6 0
45 13 0 0
77 0 0 1
我试过 cast/melt 但收效甚微。
V1 <- c(5, 5, 5, 45, 45, 77)
V2 <- c("low", "low", "medium", "low", "low", "high")
V3 <- c(10, 3, 6, 10, 3, 1)
df <- data.frame(V1, V2, V3)
df$V2 <- factor(df$V2, levels = c("low", "medium", "high"))
library(tidyr)
library(dplyr)
df %>%
group_by(V1, V2) %>%
summarise(sum = sum(V3)) %>%
spread(V2, sum, fill = 0)
# Source: local data frame [3 x 4]
#
# V1 low medium high
# (dbl) (dbl) (dbl) (dbl)
# 1 5 13 6 0
# 2 45 13 0 0
# 3 77 0 0 1
因为你正在做一个 sum
+ 转换到宽,我建议在 base R:
中使用 xtabs
df <- data.frame(V1, V3, V2) ## Keeps numeric data as numeric....
xtabs(V3 ~ V1 + V2, df)
# V2
# V1 high low medium
# 5 0 13 6
# 45 0 13 0
# 77 1 0 0
或者,如果你关心列顺序,你可以试试:
xtabs(V3 ~ V1 + factor(V2, c("low", "medium", "high")), df)
使用 rehape2
作为 Frank 在评论中的回答:
library(reshape2)
dcast(df, V1 ~ V2, value.var = "V3", fun = sum, fill = 0)
输出:
V1 high low medium
1 5 0 13 6
2 45 0 13 0
3 77 1 0 0
如果我们想保持列顺序:
dcast(df, V1 ~ factor(V2, levels = unique(V2)), value.var = "V3", sum)
输出:
V1 low medium high
1 5 13 6 0
2 45 13 0 0
3 77 0 0 1
我有一个数据集如下
V1 <- c(5,5,5,45,45,77)
V2 <- c("low", "low", "medium", "low", "low", "high")
V3 <- c(10,3,6,10,3,1)
df <- cbind.data.frame(V1,V3,V2)
v1 v2 v3
5 10 low
5 3 low
5 6 medium
45 10 low
45 3 low
77 1 high
我希望它是
v1 low medium high
5 13 6 0
45 13 0 0
77 0 0 1
我试过 cast/melt 但收效甚微。
V1 <- c(5, 5, 5, 45, 45, 77)
V2 <- c("low", "low", "medium", "low", "low", "high")
V3 <- c(10, 3, 6, 10, 3, 1)
df <- data.frame(V1, V2, V3)
df$V2 <- factor(df$V2, levels = c("low", "medium", "high"))
library(tidyr)
library(dplyr)
df %>%
group_by(V1, V2) %>%
summarise(sum = sum(V3)) %>%
spread(V2, sum, fill = 0)
# Source: local data frame [3 x 4]
#
# V1 low medium high
# (dbl) (dbl) (dbl) (dbl)
# 1 5 13 6 0
# 2 45 13 0 0
# 3 77 0 0 1
因为你正在做一个 sum
+ 转换到宽,我建议在 base R:
xtabs
df <- data.frame(V1, V3, V2) ## Keeps numeric data as numeric....
xtabs(V3 ~ V1 + V2, df)
# V2
# V1 high low medium
# 5 0 13 6
# 45 0 13 0
# 77 1 0 0
或者,如果你关心列顺序,你可以试试:
xtabs(V3 ~ V1 + factor(V2, c("low", "medium", "high")), df)
使用 rehape2
作为 Frank 在评论中的回答:
library(reshape2)
dcast(df, V1 ~ V2, value.var = "V3", fun = sum, fill = 0)
输出:
V1 high low medium
1 5 0 13 6
2 45 0 13 0
3 77 1 0 0
如果我们想保持列顺序:
dcast(df, V1 ~ factor(V2, levels = unique(V2)), value.var = "V3", sum)
输出:
V1 low medium high
1 5 13 6 0
2 45 13 0 0
3 77 0 0 1