case_when() 给出的具有不同多个条件的 R mutate()
R mutate() with varying multiple conditions given by case_when()
我正在尝试使用 mutate() 和 case_when() 在 R 中创建具有值的 table,因为 mutate() 的计算可能因 [= 的多个条件而异23=]()。但是 case_when() 的多个条件的数量也可能因我使用的模型而异。
下面是一个可重现的例子(来源:https://doseresponse.github.io/medrc/articles/medrc.html):
library(medrc)
library(dplyr)
library(tidyr)
data(spinach)
spinach$CURVE <- as.factor(spinach$CURVE)
#I can have a model using LL.4() or LL.3() function (as in 'fct' bellow)
sm1 <- metadrm(SLOPE ~ DOSE,
data=spinach,
fct=LL.3(),
ind=CURVE,
cid2=HERBICIDE,
struct="UN")
#or in other situations
sm1 <- metadrm(SLOPE ~ DOSE,
data=spinach,
fct=LL.4(),
ind=CURVE,
cid2=HERBICIDE,
struct="UN")
#Extracting the coefficients from the model
minor_coef_table <- as.data.frame(sm1$estimates$ind)
minor_coef_table <- cbind(minor_coef_table, sm1$estimates$coefficient)
minor_coef_table <- cbind(minor_coef_table, sm1$estimates$estimate)
colnames(minor_coef_table) <- c("curves","minor_coef", "minor_estimates")
#I need to create a "table" ('pdata') with a calculated column "SLOPE_per_CURVE" in a way that it is independent of the number of levels of CURVE in the data (spinach, in the example).
pdata <- spinach %>%
group_by(CURVE, HERBICIDE) %>%
expand(DOSE=exp(seq(-5, 5, length=50)))
#One of the conditions is the fct used in the model [LL.4() or LL.3()]
#Other conditions are the curve IDs
ncurves <- length(levels(spinach$CURVE))
#This is an IDEA of what I need, but it is not working as the column "SLOPE_per_CURVE is not created
pdata <- pdata %>% mutate(
SLOPE_per_CURVE =
for(i in 1:ncurves){
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[i] ~ minor_coef_table[(ncurves+i),3]+((minor_coef_table[(2*ncurves+i),3]-minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(3*ncurves+i),3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[i] ~ ((minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(2*ncurves+i),3]))))))
)
}
)
#The bellow code gives what I need and is an example of the final desired result, but it is not independent of the number of levels in CURVE as I have to write every condition.
pdata <- pdata %>% mutate(
SLOPE_per_CURVE =
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[1] ~ minor_coef_table[6,3]+((minor_coef_table[11,3]-minor_coef_table[6,3])/((1+ exp(minor_coef_table[1,3] * (log(DOSE) - log(minor_coef_table[16,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[2] ~ minor_coef_table[7,3]+((minor_coef_table[12,3]-minor_coef_table[7,3])/((1+ exp(minor_coef_table[2,3] * (log(DOSE) - log(minor_coef_table[17,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[3] ~ minor_coef_table[8,3]+((minor_coef_table[13,3]-minor_coef_table[8,3])/((1+ exp(minor_coef_table[3,3] * (log(DOSE) - log(minor_coef_table[18,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[4] ~ minor_coef_table[9,3]+((minor_coef_table[14,3]-minor_coef_table[9,3])/((1+ exp(minor_coef_table[4,3] * (log(DOSE) - log(minor_coef_table[19,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[5] ~ minor_coef_table[10,3]+((minor_coef_table[15,3]-minor_coef_table[10,3])/((1+ exp(minor_coef_table[5,3] * (log(DOSE) - log(minor_coef_table[20,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[1] ~ ((minor_coef_table[6,3])/((1+ exp(minor_coef_table[1,3] * (log(DOSE) - log(minor_coef_table[11,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[2] ~ ((minor_coef_table[7,3])/((1+ exp(minor_coef_table[2,3] * (log(DOSE) - log(minor_coef_table[12,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[3] ~ ((minor_coef_table[8,3])/((1+ exp(minor_coef_table[3,3] * (log(DOSE) - log(minor_coef_table[13,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[4] ~ ((minor_coef_table[9,3])/((1+ exp(minor_coef_table[4,3] * (log(DOSE) - log(minor_coef_table[14,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[5] ~ ((minor_coef_table[10,3])/((1+ exp(minor_coef_table[5,3] * (log(DOSE) - log(minor_coef_table[15,3]))))))
)
)
编辑:
在M.Viking回答后,这是解决方案:
for(i in 1:ncurves){
pdata <- coalesce(pdata %>% mutate(
SLOPE_per_CURVE =
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[i] ~ minor_coef_table[(ncurves+i),3]+((minor_coef_table[(2*ncurves+i),3]-minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(3*ncurves+i),3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[i] ~ ((minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(2*ncurves+i),3]))))))
)
), pdata)
}
我用 iris 数据集
制作了一个 for
循环 mutate
case_when
的玩具示例
第一个复杂化是在一个简单的 for (j in 1:10){out<-j}
循环中,输出数据被 j
的每个后续迭代覆盖,最后,只有 10th
的结果运行 被保留。
然后我了解了coalesce()
函数;它结合(合并?联合?加入?)等长的稀疏数据(类似于 Microsoft Excel 的特殊粘贴“跳过空白”转换)。
在下面的示例代码中,我们取 iris 数据集,并从 1 循环到 10,如果 iris$Sepal.Length
(又名 iris[,1]
)等于循环迭代 j
,我们将结果变量(斜率)更改为 100+j
.
iris4 <- tibble(iris)
for (j in 1:10) {
iris4 <- coalesce(iris4 %>% mutate(Slope = case_when(Sepal.Length == j ~ 100+j)),iris4)}
table(iris4$Slope)
我正在尝试使用 mutate() 和 case_when() 在 R 中创建具有值的 table,因为 mutate() 的计算可能因 [= 的多个条件而异23=]()。但是 case_when() 的多个条件的数量也可能因我使用的模型而异。 下面是一个可重现的例子(来源:https://doseresponse.github.io/medrc/articles/medrc.html):
library(medrc)
library(dplyr)
library(tidyr)
data(spinach)
spinach$CURVE <- as.factor(spinach$CURVE)
#I can have a model using LL.4() or LL.3() function (as in 'fct' bellow)
sm1 <- metadrm(SLOPE ~ DOSE,
data=spinach,
fct=LL.3(),
ind=CURVE,
cid2=HERBICIDE,
struct="UN")
#or in other situations
sm1 <- metadrm(SLOPE ~ DOSE,
data=spinach,
fct=LL.4(),
ind=CURVE,
cid2=HERBICIDE,
struct="UN")
#Extracting the coefficients from the model
minor_coef_table <- as.data.frame(sm1$estimates$ind)
minor_coef_table <- cbind(minor_coef_table, sm1$estimates$coefficient)
minor_coef_table <- cbind(minor_coef_table, sm1$estimates$estimate)
colnames(minor_coef_table) <- c("curves","minor_coef", "minor_estimates")
#I need to create a "table" ('pdata') with a calculated column "SLOPE_per_CURVE" in a way that it is independent of the number of levels of CURVE in the data (spinach, in the example).
pdata <- spinach %>%
group_by(CURVE, HERBICIDE) %>%
expand(DOSE=exp(seq(-5, 5, length=50)))
#One of the conditions is the fct used in the model [LL.4() or LL.3()]
#Other conditions are the curve IDs
ncurves <- length(levels(spinach$CURVE))
#This is an IDEA of what I need, but it is not working as the column "SLOPE_per_CURVE is not created
pdata <- pdata %>% mutate(
SLOPE_per_CURVE =
for(i in 1:ncurves){
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[i] ~ minor_coef_table[(ncurves+i),3]+((minor_coef_table[(2*ncurves+i),3]-minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(3*ncurves+i),3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[i] ~ ((minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(2*ncurves+i),3]))))))
)
}
)
#The bellow code gives what I need and is an example of the final desired result, but it is not independent of the number of levels in CURVE as I have to write every condition.
pdata <- pdata %>% mutate(
SLOPE_per_CURVE =
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[1] ~ minor_coef_table[6,3]+((minor_coef_table[11,3]-minor_coef_table[6,3])/((1+ exp(minor_coef_table[1,3] * (log(DOSE) - log(minor_coef_table[16,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[2] ~ minor_coef_table[7,3]+((minor_coef_table[12,3]-minor_coef_table[7,3])/((1+ exp(minor_coef_table[2,3] * (log(DOSE) - log(minor_coef_table[17,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[3] ~ minor_coef_table[8,3]+((minor_coef_table[13,3]-minor_coef_table[8,3])/((1+ exp(minor_coef_table[3,3] * (log(DOSE) - log(minor_coef_table[18,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[4] ~ minor_coef_table[9,3]+((minor_coef_table[14,3]-minor_coef_table[9,3])/((1+ exp(minor_coef_table[4,3] * (log(DOSE) - log(minor_coef_table[19,3])))))),
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[5] ~ minor_coef_table[10,3]+((minor_coef_table[15,3]-minor_coef_table[10,3])/((1+ exp(minor_coef_table[5,3] * (log(DOSE) - log(minor_coef_table[20,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[1] ~ ((minor_coef_table[6,3])/((1+ exp(minor_coef_table[1,3] * (log(DOSE) - log(minor_coef_table[11,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[2] ~ ((minor_coef_table[7,3])/((1+ exp(minor_coef_table[2,3] * (log(DOSE) - log(minor_coef_table[12,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[3] ~ ((minor_coef_table[8,3])/((1+ exp(minor_coef_table[3,3] * (log(DOSE) - log(minor_coef_table[13,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[4] ~ ((minor_coef_table[9,3])/((1+ exp(minor_coef_table[4,3] * (log(DOSE) - log(minor_coef_table[14,3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[5] ~ ((minor_coef_table[10,3])/((1+ exp(minor_coef_table[5,3] * (log(DOSE) - log(minor_coef_table[15,3]))))))
)
)
编辑:
在M.Viking回答后,这是解决方案:
for(i in 1:ncurves){
pdata <- coalesce(pdata %>% mutate(
SLOPE_per_CURVE =
case_when(
sm1$fct$name == "LL.4" & CURVE == levels(pdata$CURVE)[i] ~ minor_coef_table[(ncurves+i),3]+((minor_coef_table[(2*ncurves+i),3]-minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(3*ncurves+i),3])))))),
sm1$fct$name == "LL.3" & CURVE == levels(pdata$CURVE)[i] ~ ((minor_coef_table[(ncurves+i),3])/((1+ exp(minor_coef_table[i,3] * (log(DOSE) - log(minor_coef_table[(2*ncurves+i),3]))))))
)
), pdata)
}
我用 iris 数据集
制作了一个for
循环 mutate
case_when
的玩具示例
第一个复杂化是在一个简单的 for (j in 1:10){out<-j}
循环中,输出数据被 j
的每个后续迭代覆盖,最后,只有 10th
的结果运行 被保留。
然后我了解了coalesce()
函数;它结合(合并?联合?加入?)等长的稀疏数据(类似于 Microsoft Excel 的特殊粘贴“跳过空白”转换)。
在下面的示例代码中,我们取 iris 数据集,并从 1 循环到 10,如果 iris$Sepal.Length
(又名 iris[,1]
)等于循环迭代 j
,我们将结果变量(斜率)更改为 100+j
.
iris4 <- tibble(iris)
for (j in 1:10) {
iris4 <- coalesce(iris4 %>% mutate(Slope = case_when(Sepal.Length == j ~ 100+j)),iris4)}
table(iris4$Slope)