基于多个条件创建一个新变量并在 R 中循环
Create a new variable based on mutilpe condition and loop in R
我有一个数据集,其中包含 4 个变量,例如 - "ID", "V", "value", "weight"
。
ID V value weight
A 1 8723.286 0.12183436
A 0 8889.905 0.09787817
A 1 14984.370 1.00000000
B 1 8176.189 0.12183436
B 1 8342.808 0.09787817
B 1 14437.272 0.18412047
我想计算一个名为 "output"
的变量。计算输出的逻辑是
对于每个 ID,
如果 V1 = 1
和 V2=0
那么,output = value1 * weight1 + value2*(1-weight1)
.
如果 V1=1
和 V2=1
那么,output = value1 * weight1 + [(value2 + value3)/2]*(1-weight1)
结果会是这样的:
ID V value weight output
A 1 8723.286 0.12183436 8869.605081
A 0 8889.905 0.09787817 8869.605081
A 1 14984.37 1.00000000 8869.605081
B 1 8176.189 0.12183436 10998.48252
B 1 8342.808 0.09787817 10998.48252
B 1 14437.272 0.18412047 10998.48252
我试过这样:
dat <- data.frame(
ID = rep(c("A","B"), each=3),
V = c(1,0,1,1,1,1) ,
value = c(8723.286, 8889.905, 14984.37, 8176.189, 8342.808, 14437.272),
weight = c(0.12183436, 0.09787817, 1.00000000, 0.12183436, 0.09787817, 0.18412047)
)
dats <- split(dat, dat$ID)
ifelse(dats[[1]]$V[1]==1 & dats[[1]]$V[2]==0, dats[[1]]$weight[1]*dats[[1]]$value[1]+(1-dats[[1]]$weight[1])*dats[[1]]$value[2], NA)
ifelse(dats[[2]]$V[1]==1 & dats[[2]]$V[2]==1, dats[[2]]$weight[1]*dats[[2]]$value[1]+(1-dats[[2]]$weight[1])*((dats[[2]]$value[2]+dats[[2]]$value[3])/2), NA)
我如何使用循环执行此操作?谢谢你。
我的建议(可能有更聪明的方法),根据您的data.frame:
在源代码中进行了注释
# create your data.frame plus a record producing NA
dat <- data.frame(
ID = rep(c("A","B","C"), each=3),
V = c(1,0,1,1,1,1,0,0,1),
value = c(8723.286, 8889.905, 14984.37, 8176.189, 8342.808, 14437.272,10,20,30),
weight = c(0.12183436, 0.09787817, 1.00000000, 0.12183436, 0.09787817, 0.18412047,0.1,0.2,0.3))
# display the numbers as per your sample provided
options( digits = 10 )
# extract the unique ID strings (factors levels, actually)
IDs <- unique( dat$ID )
# loop through these IDs
for( i in IDs )
{
# get the row numbers for the three rows with the record data
idx <- which( dat$ID == i )
# check if your first condition applies
if( dat$V[idx[ 1 ] ] == 1 & dat$V[idx[ 2 ] ] == 0 )
# if that's the case, fill the three output column rows with the calculated value
dat$output[ idx ] <- dat$value[ idx[ 1 ] ] * dat$weight[ idx[ 1 ] ] +
dat$value[ idx[ 2 ] ] * ( 1 - dat$weight[ idx[ 1 ] ] )
# if the other case is true
else if( dat$V[idx[ 1 ] ] == 1 & dat$V[idx[ 2 ] ] == 1 )
dat$output[ idx ] <- dat$value[ idx[ 1 ] ] * dat$weight[ idx[ 1 ] ] +
( dat$value[ idx[ 2 ] ] + dat$value[ idx[ 3 ] ] ) / 2 *
( 1 - dat$weight[ idx[ 1 ] ] )
# fallback
else
dat$output[ idx ] <- NA
}
dat
ID V value weight output
1 A 1 8723.286 0.12183436 8869.605081
2 A 0 8889.905 0.09787817 8869.605081
3 A 1 14984.370 1.00000000 8869.605081
4 B 1 8176.189 0.12183436 10998.482520
5 B 1 8342.808 0.09787817 10998.482520
6 B 1 14437.272 0.18412047 10998.482520
7 C 0 10.000 0.10000000 NA
8 C 0 20.000 0.20000000 NA
9 C 1 30.000 0.30000000 NA
我有一个数据集,其中包含 4 个变量,例如 - "ID", "V", "value", "weight"
。
ID V value weight
A 1 8723.286 0.12183436
A 0 8889.905 0.09787817
A 1 14984.370 1.00000000
B 1 8176.189 0.12183436
B 1 8342.808 0.09787817
B 1 14437.272 0.18412047
我想计算一个名为 "output"
的变量。计算输出的逻辑是
对于每个 ID,
如果 V1 = 1
和 V2=0
那么,output = value1 * weight1 + value2*(1-weight1)
.
如果 V1=1
和 V2=1
那么,output = value1 * weight1 + [(value2 + value3)/2]*(1-weight1)
结果会是这样的:
ID V value weight output
A 1 8723.286 0.12183436 8869.605081
A 0 8889.905 0.09787817 8869.605081
A 1 14984.37 1.00000000 8869.605081
B 1 8176.189 0.12183436 10998.48252
B 1 8342.808 0.09787817 10998.48252
B 1 14437.272 0.18412047 10998.48252
我试过这样:
dat <- data.frame(
ID = rep(c("A","B"), each=3),
V = c(1,0,1,1,1,1) ,
value = c(8723.286, 8889.905, 14984.37, 8176.189, 8342.808, 14437.272),
weight = c(0.12183436, 0.09787817, 1.00000000, 0.12183436, 0.09787817, 0.18412047)
)
dats <- split(dat, dat$ID)
ifelse(dats[[1]]$V[1]==1 & dats[[1]]$V[2]==0, dats[[1]]$weight[1]*dats[[1]]$value[1]+(1-dats[[1]]$weight[1])*dats[[1]]$value[2], NA)
ifelse(dats[[2]]$V[1]==1 & dats[[2]]$V[2]==1, dats[[2]]$weight[1]*dats[[2]]$value[1]+(1-dats[[2]]$weight[1])*((dats[[2]]$value[2]+dats[[2]]$value[3])/2), NA)
我如何使用循环执行此操作?谢谢你。
我的建议(可能有更聪明的方法),根据您的data.frame:
在源代码中进行了注释# create your data.frame plus a record producing NA
dat <- data.frame(
ID = rep(c("A","B","C"), each=3),
V = c(1,0,1,1,1,1,0,0,1),
value = c(8723.286, 8889.905, 14984.37, 8176.189, 8342.808, 14437.272,10,20,30),
weight = c(0.12183436, 0.09787817, 1.00000000, 0.12183436, 0.09787817, 0.18412047,0.1,0.2,0.3))
# display the numbers as per your sample provided
options( digits = 10 )
# extract the unique ID strings (factors levels, actually)
IDs <- unique( dat$ID )
# loop through these IDs
for( i in IDs )
{
# get the row numbers for the three rows with the record data
idx <- which( dat$ID == i )
# check if your first condition applies
if( dat$V[idx[ 1 ] ] == 1 & dat$V[idx[ 2 ] ] == 0 )
# if that's the case, fill the three output column rows with the calculated value
dat$output[ idx ] <- dat$value[ idx[ 1 ] ] * dat$weight[ idx[ 1 ] ] +
dat$value[ idx[ 2 ] ] * ( 1 - dat$weight[ idx[ 1 ] ] )
# if the other case is true
else if( dat$V[idx[ 1 ] ] == 1 & dat$V[idx[ 2 ] ] == 1 )
dat$output[ idx ] <- dat$value[ idx[ 1 ] ] * dat$weight[ idx[ 1 ] ] +
( dat$value[ idx[ 2 ] ] + dat$value[ idx[ 3 ] ] ) / 2 *
( 1 - dat$weight[ idx[ 1 ] ] )
# fallback
else
dat$output[ idx ] <- NA
}
dat
ID V value weight output
1 A 1 8723.286 0.12183436 8869.605081
2 A 0 8889.905 0.09787817 8869.605081
3 A 1 14984.370 1.00000000 8869.605081
4 B 1 8176.189 0.12183436 10998.482520
5 B 1 8342.808 0.09787817 10998.482520
6 B 1 14437.272 0.18412047 10998.482520
7 C 0 10.000 0.10000000 NA
8 C 0 20.000 0.20000000 NA
9 C 1 30.000 0.30000000 NA