我应该如何使用嵌套的 ifelse 语句?
How should I use nested ifelse statements?
我用以下数据创建了一个数据框
name <- c("A","B","C","D","E","F","G","H","I","J")
age <- c(22,43,12,17,29,5,51,56,9,44)
sex <- c("M","F","M","M","M","F","F","M","F","F")
rock <- data.frame(name,age,sex,stringsAsFactors = TRUE)
rock
现在我想知道:
如果姓名为E至J且性别不等于F则状态为“1F”,如果姓名为A至D且年龄大于15则状态为"Young"。其他都是 "Others"
为此,我正在应用以下代码:
rock$status <- ifelse(rock$name==c("E","F","G","H","I","J")&
rock$sex!="F","1F",
ifelse(rock$name==c("E","F","G","H","I","J")&rock$sex=="F","Fenamle",
ifelse(rock$name==c("A","B","C","D") & rock$age>15,"Young","Others")))
rock
但我得到的输出如下:
name age sex status
1 A 22 M Young
2 B 43 F Young
3 C 12 M Others
4 D 17 M Young
5 E 29 M Others
6 F 5 F Others
7 G 51 F Others
8 H 56 M Others
9 I 9 F Others
10 J 44 F Others
但是,它必须是 E 上的“1F”并且 H.but 它显示 "Others"
我的代码做错了什么?
请大家指正,并给我一些宝贵的建议。
我们需要使用 %in%
而不是 ==
:
rock$status <- ifelse(rock$name %in% c("E", "F", "G", "H", "I", "J") &
rock$sex != "F", "1F",
ifelse(rock$name %in% c("E", "F", "G", "H", "I", "J") &
rock$sex == "F", "Female",
ifelse(rock$name %in% c("A", "B", "C", "D") &
rock$age > 15, "Young", "Others")))
rock
# name age sex status
# 1 A 22 M Young
# 2 B 43 F Young
# 3 C 12 M Others
# 4 D 17 M Young
# 5 E 29 M 1F
# 6 F 5 F Female
# 7 G 51 F Female
# 8 H 56 M 1F
# 9 I 9 F Female
# 10 J 44 F Female
使用 data.table 你可以:
library(data.table)
rock <- data.table(rock)
rock[name %in% LETTERS[5:10] & sex != "F", status := "1F"]
rock[name %in% LETTERS[1:4] & age > 15, status := "Young"]
rock[is.na(status), status := "Other"]
rock
# name age sex status
# 1: A 22 M Young
# 2: B 43 F Young
# 3: C 12 M Other
# 4: D 17 M Young
# 5: E 29 M 1F
# 6: F 5 F Other
# 7: G 51 F Other
# 8: H 56 M 1F
# 9: I 9 F Other
# 10: J 44 F Other
在这种情况下,我通常更喜欢预先分配索引,然后使用这些索引的总和来索引唯一值。它比嵌套的 ifelse
(imo) 更快、更易读。一个例子:
i1 <- rock$name %in% c("E", "F", "G", "H", "I", "J") & rock$sex != "F"
i2 <- rock$name %in% c("E", "F", "G", "H", "I", "J") & rock$sex == "F"
i3 <- rock$name %in% c("A", "B", "C", "D") & rock$age > 15
rock$status <- c("Other", "1F", "Female", "Young")[1 + i1 + 2*i2 + 3*i3]
给出了想要的结果:
> rock
name age sex status
1 A 22 M Young
2 B 43 F Young
3 C 12 M Other
4 D 17 M Young
5 E 29 M 1F
6 F 5 F Female
7 G 51 F Female
8 H 56 M 1F
9 I 9 F Female
10 J 44 F Female
使用dplyr
的case_when()
函数的解决方案:
library(dplyr)
name <- c("A","B","C","D","E","F","G","H","I","J")
age <- c(22,43,12,17,29,5,51,56,9,44)
sex <- c("M","F","M","M","M","F","F","M","F","F")
rock <- data.frame(name,age,sex,stringsAsFactors = TRUE)
name_condition_1 <- c("E","F","G","H","I","J")
name_condition_2 <- c("A","B","C","D")
rock %>% mutate(
status = case_when(
name %in% name_condition_1 & sex != "F" ~ "1F",
name %in% name_condition_1 & sex == "F" ~ "Female",
name %in% name_condition_2 & age > 15 ~ "Young",
TRUE ~ "Others"
)
)
生产:
name age sex status
1 A 22 M Young
2 B 43 F Young
3 C 12 M Others
4 D 17 M Young
5 E 29 M 1F
6 F 5 F Female
7 G 51 F Female
8 H 56 M 1F
9 I 9 F Female
10 J 44 F Female
为了完整起见,这里还有一个解决方案,使用joins和non-equi joins来更新status
专栏:
library(data.table)
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex), status := "Female"][
.(name = LETTERS[5:10], status = NA_character_), on = .(name, status), status := "1F"][
.(status = NA_character_), on = .(status), status := "Other"][]
name age sex status
1: A 22 M Young
2: B 43 F Young
3: C 12 M Other
4: D 17 M Young
5: E 29 M 1F
6: F 5 F Female
7: G 51 F Female
8: H 56 M 1F
9: I 9 F Female
10: J 44 F Female
不幸的是,非相等连接不适用于不相等的运算符 !=
。所以,
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex != sex), status := "1F"][]
给出错误信息。相反,我必须首先加入 name
和 sex
以将 status
设置为 Female
,然后检查 status
中的 NA
以获得免费套装。
但是,还有另一种解决方法,使用两个非相等连接代替:
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex < sex), status := "1F"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex > sex), status := "1F"][]
data$status <- ifelse(data$name %in% c("A", "B", "C", "D") & data$age > 15,"Young",ifelse(data$sex != "F" & data$name %in% c("E", "F", "G", "H", "I", "J"),"1F","Others"))
data
我用以下数据创建了一个数据框
name <- c("A","B","C","D","E","F","G","H","I","J")
age <- c(22,43,12,17,29,5,51,56,9,44)
sex <- c("M","F","M","M","M","F","F","M","F","F")
rock <- data.frame(name,age,sex,stringsAsFactors = TRUE)
rock
现在我想知道:
如果姓名为E至J且性别不等于F则状态为“1F”,如果姓名为A至D且年龄大于15则状态为"Young"。其他都是 "Others"
为此,我正在应用以下代码:
rock$status <- ifelse(rock$name==c("E","F","G","H","I","J")&
rock$sex!="F","1F",
ifelse(rock$name==c("E","F","G","H","I","J")&rock$sex=="F","Fenamle",
ifelse(rock$name==c("A","B","C","D") & rock$age>15,"Young","Others")))
rock
但我得到的输出如下:
name age sex status
1 A 22 M Young
2 B 43 F Young
3 C 12 M Others
4 D 17 M Young
5 E 29 M Others
6 F 5 F Others
7 G 51 F Others
8 H 56 M Others
9 I 9 F Others
10 J 44 F Others
但是,它必须是 E 上的“1F”并且 H.but 它显示 "Others"
我的代码做错了什么?
请大家指正,并给我一些宝贵的建议。
我们需要使用 %in%
而不是 ==
:
rock$status <- ifelse(rock$name %in% c("E", "F", "G", "H", "I", "J") &
rock$sex != "F", "1F",
ifelse(rock$name %in% c("E", "F", "G", "H", "I", "J") &
rock$sex == "F", "Female",
ifelse(rock$name %in% c("A", "B", "C", "D") &
rock$age > 15, "Young", "Others")))
rock
# name age sex status
# 1 A 22 M Young
# 2 B 43 F Young
# 3 C 12 M Others
# 4 D 17 M Young
# 5 E 29 M 1F
# 6 F 5 F Female
# 7 G 51 F Female
# 8 H 56 M 1F
# 9 I 9 F Female
# 10 J 44 F Female
使用 data.table 你可以:
library(data.table)
rock <- data.table(rock)
rock[name %in% LETTERS[5:10] & sex != "F", status := "1F"]
rock[name %in% LETTERS[1:4] & age > 15, status := "Young"]
rock[is.na(status), status := "Other"]
rock
# name age sex status
# 1: A 22 M Young
# 2: B 43 F Young
# 3: C 12 M Other
# 4: D 17 M Young
# 5: E 29 M 1F
# 6: F 5 F Other
# 7: G 51 F Other
# 8: H 56 M 1F
# 9: I 9 F Other
# 10: J 44 F Other
在这种情况下,我通常更喜欢预先分配索引,然后使用这些索引的总和来索引唯一值。它比嵌套的 ifelse
(imo) 更快、更易读。一个例子:
i1 <- rock$name %in% c("E", "F", "G", "H", "I", "J") & rock$sex != "F"
i2 <- rock$name %in% c("E", "F", "G", "H", "I", "J") & rock$sex == "F"
i3 <- rock$name %in% c("A", "B", "C", "D") & rock$age > 15
rock$status <- c("Other", "1F", "Female", "Young")[1 + i1 + 2*i2 + 3*i3]
给出了想要的结果:
> rock name age sex status 1 A 22 M Young 2 B 43 F Young 3 C 12 M Other 4 D 17 M Young 5 E 29 M 1F 6 F 5 F Female 7 G 51 F Female 8 H 56 M 1F 9 I 9 F Female 10 J 44 F Female
使用dplyr
的case_when()
函数的解决方案:
library(dplyr)
name <- c("A","B","C","D","E","F","G","H","I","J")
age <- c(22,43,12,17,29,5,51,56,9,44)
sex <- c("M","F","M","M","M","F","F","M","F","F")
rock <- data.frame(name,age,sex,stringsAsFactors = TRUE)
name_condition_1 <- c("E","F","G","H","I","J")
name_condition_2 <- c("A","B","C","D")
rock %>% mutate(
status = case_when(
name %in% name_condition_1 & sex != "F" ~ "1F",
name %in% name_condition_1 & sex == "F" ~ "Female",
name %in% name_condition_2 & age > 15 ~ "Young",
TRUE ~ "Others"
)
)
生产:
name age sex status
1 A 22 M Young
2 B 43 F Young
3 C 12 M Others
4 D 17 M Young
5 E 29 M 1F
6 F 5 F Female
7 G 51 F Female
8 H 56 M 1F
9 I 9 F Female
10 J 44 F Female
为了完整起见,这里还有一个解决方案,使用joins和non-equi joins来更新status
专栏:
library(data.table)
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex), status := "Female"][
.(name = LETTERS[5:10], status = NA_character_), on = .(name, status), status := "1F"][
.(status = NA_character_), on = .(status), status := "Other"][]
name age sex status 1: A 22 M Young 2: B 43 F Young 3: C 12 M Other 4: D 17 M Young 5: E 29 M 1F 6: F 5 F Female 7: G 51 F Female 8: H 56 M 1F 9: I 9 F Female 10: J 44 F Female
不幸的是,非相等连接不适用于不相等的运算符 !=
。所以,
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex != sex), status := "1F"][]
给出错误信息。相反,我必须首先加入 name
和 sex
以将 status
设置为 Female
,然后检查 status
中的 NA
以获得免费套装。
但是,还有另一种解决方法,使用两个非相等连接代替:
setDT(rock)[.(name = LETTERS[1:4], age = 15), on = .(name, age > age), status := "Young"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex < sex), status := "1F"][
.(name = LETTERS[5:10], sex = "F"), on = .(name, sex > sex), status := "1F"][]
data$status <- ifelse(data$name %in% c("A", "B", "C", "D") & data$age > 15,"Young",ifelse(data$sex != "F" & data$name %in% c("E", "F", "G", "H", "I", "J"),"1F","Others"))
data