R脚本如何从其他数据集中替换数据集中的值
R script how to substitute values in dataset from other dataset
我在 R 中处理数据时遇到问题。我有这个数据集(示例)
df <- data.frame(GRP = c(1,2,3,1,2,3,4),
x=c("x1","x1","x1","x2","x2","x2","x2"),y=c(3,8,2,20,24,28,31))
GRP x y
1 x1 3
2 x1 8
3 x1 2
1 x2 20
2 x2 24
3 x2 28
4 x2 31
并想将此数据集转换为
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 0 31
我试过了:
df1 <- expand.grid(GRP = unique(df$GRP), x1=0, x2=0)
df1$x1[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP &
df$x %in% c("x1")]
df1$x2[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP & df$x %in%
c("x2")]
得到的结果是:
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 3 31
有什么解决这个问题的建议吗?
如果您需要将数据从 long
转换为 wide
,这些是一些选项
library(reshape2)
dcast(df, GRP~x, value.var='y', fill=0)
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
或
xtabs(y~GRP+x, df)
或
library(tidyr)
spread(df, x,y, fill=0)
此外,对于从 'df' 中替换 'df1' 中的值,这可能会有所帮助
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[df1$GRP%in% df$GRP[indx1]] <- df$y[indx1]
df1$x2[df1$GRP%in% df$GRP[indx2]] <- df$y[indx2]
df1
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
更新
假设,如果'df1'中的行顺序不同
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
df1 <- df1[order(-df1$GRP),]
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[match(df$GRP[indx1], df1$GRP)] <- df$y[indx1]
df1$x2[match(df$GRP[indx2], df1$GRP)] <- df$y[indx2]
df1
# GRP x1 x2
#4 4 0 31
#3 3 2 28
#2 2 8 24
#1 1 3 20
它也适用于其他行顺序,从原来的 'df1'
开始
set.seed(28)
df1 <- df1[sample(nrow(df1)),]
在运行上面列出的代码
之后给出输出
df1
# GRP x1 x2
#1 1 3 20
#4 4 0 31
#3 3 2 28
#2 2 8 24
我在 R 中处理数据时遇到问题。我有这个数据集(示例)
df <- data.frame(GRP = c(1,2,3,1,2,3,4),
x=c("x1","x1","x1","x2","x2","x2","x2"),y=c(3,8,2,20,24,28,31))
GRP x y
1 x1 3
2 x1 8
3 x1 2
1 x2 20
2 x2 24
3 x2 28
4 x2 31
并想将此数据集转换为
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 0 31
我试过了:
df1 <- expand.grid(GRP = unique(df$GRP), x1=0, x2=0)
df1$x1[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP &
df$x %in% c("x1")]
df1$x2[df1$GRP %in% df$GRP]<- df$y[df1$GRP %in% df$GRP & df$x %in%
c("x2")]
得到的结果是:
GRP x1 x2
1 3 20
2 8 24
3 2 28
4 3 31
有什么解决这个问题的建议吗?
如果您需要将数据从 long
转换为 wide
,这些是一些选项
library(reshape2)
dcast(df, GRP~x, value.var='y', fill=0)
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
或
xtabs(y~GRP+x, df)
或
library(tidyr)
spread(df, x,y, fill=0)
此外,对于从 'df' 中替换 'df1' 中的值,这可能会有所帮助
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[df1$GRP%in% df$GRP[indx1]] <- df$y[indx1]
df1$x2[df1$GRP%in% df$GRP[indx2]] <- df$y[indx2]
df1
# GRP x1 x2
#1 1 3 20
#2 2 8 24
#3 3 2 28
#4 4 0 31
更新
假设,如果'df1'中的行顺序不同
df1 <- data.frame(GRP=unique(df$GRP), x1=0, x2=0)
df1 <- df1[order(-df1$GRP),]
indx1 <- df$x=='x1' & df$GRP %in% df1$GRP
indx2 <- df$x=='x2' & df$GRP %in% df1$GRP
df1$x1[match(df$GRP[indx1], df1$GRP)] <- df$y[indx1]
df1$x2[match(df$GRP[indx2], df1$GRP)] <- df$y[indx2]
df1
# GRP x1 x2
#4 4 0 31
#3 3 2 28
#2 2 8 24
#1 1 3 20
它也适用于其他行顺序,从原来的 'df1'
开始 set.seed(28)
df1 <- df1[sample(nrow(df1)),]
在运行上面列出的代码
之后给出输出 df1
# GRP x1 x2
#1 1 3 20
#4 4 0 31
#3 3 2 28
#2 2 8 24