替换嵌套 ifelse() 语句链的最快方法
the quickest way to replace a nested ifelse() statements chain
我有这一系列的嵌套语句
data$Country == 1,"Brazil",
ifelse(data$Country == 2, "Canada",ifelse(
data$Country == 3, "China",ifelse(
data$Country == 4, "Ecuador",ifelse(
data$Country == 5, "France",ifelse(
data$Country == 6, "Germany",ifelse(
data$Country == 7, "India",ifelse(
data$Country == 8, "Italy",ifelse(
data$Country == 9, "Mexico",ifelse(
data$Country == 10, "Nigeria",ifelse(
data$Country == 11, "Poland",ifelse(
data$Country == 12, "Russia",ifelse(
data$Country == 13, "South Africa",
ifelse(
data$Country == 14, "South Korea",ifelse(
data$Country == 15, "Singapore",
ifelse(
data$Country == 16, "Spain",
ifelse(
data$Country == 17, "Sweden",ifelse(
data$Country == 18, "United Kingdom",ifelse(
data$Country == 19, "United States","l"
))))))))))))))))))))
我一直在寻找将任何编码变量转换为相应国家/地区名称的最快方法。你觉得有什么办法可以应对这种操作吗?
非常感谢
我不确定所需的用途。但也许您可以尝试使用命名向量。这不是最优雅的解决方案,尽管它解决了 ifelse 混乱问题;)
4 个国家的示例。中国 = "4"
countrys <- c("Brazil", "Canada",
"China",
"Ecuador")
names(countrys) <- c(2:5)
# Test data.frame
data <- data.frame(country = 4)
# Now we can get the country directly from the data$country:
# Careful! 4 is not '4'
unname(countrys[as.character(data$country)])
有2个选项:
1: case_when
来自 dplyr
library(dplyr)
data.frame(info = letters[1:5],
country_id = 1:5) %>%
mutate(country_name = case_when(country_id == 1 ~ "Brazil",
country_id == 2 ~ "Canada",
country_id == 3 ~ "China",
country_id == 4 ~ "Ecuador",
country_id == 5 ~ "France",
TRUE ~ "Unknown"))
info country_id country_name
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
2: 合并或加入一个国家的信息 table:
# country table
countries <- data.frame(country_id = 1:5,
country_name = c("Brazil", "Canada", "China", "Ecuador", "France"))
data.frame(info = letters[1:5],
country_id = 1:5) %>%
left_join(countries, by = "country_id")
info country_id country_name
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
我的偏好是 2,更少的编码和更少的错误机会。您可以将国家/地区 table 保存在您的数据库或某个文件中并进行维护,而无需更改代码。
这是 switch
语句的一个很好的案例,在我看来,它比 dplyr::case_when
或一系列 ifelse
的代码更具可读性,并且易于扩展,例如,如果还有其他条件,例如地区、城市等
get_country <- Vectorize(function(x){
switch(as.character(x),
"1" = "Brazil", "2" = "Canada", "3" = "China", "4" = "Ecuador",
"5" = "France", "6" = "Germany", "7" = "India", "8" = "Italy",
"9" = "Mexico", "10" = "Nigeria", "11" = "Poland", "12" = "Russia",
"13" = "South Africa", "14" = "South Korea", "15" = "Singapore",
"16" = "Spain", "17" = "Sweden", "18" = "United Kingdom", "19" = "United States",
NA)
})
data.frame(info = letters[1:5],
country_id = 1:5) %>%
mutate(country = get_country(country_id))
info country_id country
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
但是像这样的长语句需要大量的打字工作。或者,一种更动态的方法,我们可以使用采用输入向量的构造函数创建 switch
语句。这里我使用 maps
包中的 ISO3166 数据集来创建 269 个国家/地区的表达式。
constructor <- function(ids, names){
purrr::imap_chr(as.character(ids), ~paste(paste0("\"", .x ,"\""),
paste0("\"", names[.y], "\""),
sep = "=")) %>%
paste0(collapse = ", ") %>%
paste0("Vectorize(function(x) switch(as.character(x), ", ., ", NA))", collapse = "") %>%
str2expression()
}
get_country <- eval(constructor(1:149, trimws(rworldmap::countryExData$Country)))
set.seed(1)
data.frame(info = sample(letters, size = 5, replace = T),
country_id = sample.int(149, 5, replace = T)) %>%
mutate(country = get_country(country_id))
info country_id country
1 y 122 Sierra Leone
2 h 39 Algeria
3 l 42 Eritrea
4 y 134 Trinidad & Tobago
5 w 24 Chile
为了展示力量 - 让我们用另外 2 行代码为约 20000 个城市创建一个控制流
CITIES <- maps::world.cities %>% filter(pop > 10000) %>% arrange(desc(pop))
get_city <- eval(constructor(1:nrow(CITIES), trimws(CITIES$name)))
data.frame(city_id = sample.int(23255, size = 100, replace = T),
country_id = sample.int(269, 100, replace = T)) %>%
mutate(country = get_country(country_id),
city = get_city(city_id))
这种方法的一个好处是您可以轻松地优化您的控制流,方法是确保构造函数的输入向量得到优化,即最先发生的情况,然后可能制作支持 嵌套 方法的函数,例如 get_continent(get_country(get_city)))
.
我有这一系列的嵌套语句
data$Country == 1,"Brazil",
ifelse(data$Country == 2, "Canada",ifelse(
data$Country == 3, "China",ifelse(
data$Country == 4, "Ecuador",ifelse(
data$Country == 5, "France",ifelse(
data$Country == 6, "Germany",ifelse(
data$Country == 7, "India",ifelse(
data$Country == 8, "Italy",ifelse(
data$Country == 9, "Mexico",ifelse(
data$Country == 10, "Nigeria",ifelse(
data$Country == 11, "Poland",ifelse(
data$Country == 12, "Russia",ifelse(
data$Country == 13, "South Africa",
ifelse(
data$Country == 14, "South Korea",ifelse(
data$Country == 15, "Singapore",
ifelse(
data$Country == 16, "Spain",
ifelse(
data$Country == 17, "Sweden",ifelse(
data$Country == 18, "United Kingdom",ifelse(
data$Country == 19, "United States","l"
))))))))))))))))))))
我一直在寻找将任何编码变量转换为相应国家/地区名称的最快方法。你觉得有什么办法可以应对这种操作吗?
非常感谢
我不确定所需的用途。但也许您可以尝试使用命名向量。这不是最优雅的解决方案,尽管它解决了 ifelse 混乱问题;)
4 个国家的示例。中国 = "4"
countrys <- c("Brazil", "Canada",
"China",
"Ecuador")
names(countrys) <- c(2:5)
# Test data.frame
data <- data.frame(country = 4)
# Now we can get the country directly from the data$country:
# Careful! 4 is not '4'
unname(countrys[as.character(data$country)])
有2个选项:
1: case_when
来自 dplyr
library(dplyr)
data.frame(info = letters[1:5],
country_id = 1:5) %>%
mutate(country_name = case_when(country_id == 1 ~ "Brazil",
country_id == 2 ~ "Canada",
country_id == 3 ~ "China",
country_id == 4 ~ "Ecuador",
country_id == 5 ~ "France",
TRUE ~ "Unknown"))
info country_id country_name
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
2: 合并或加入一个国家的信息 table:
# country table
countries <- data.frame(country_id = 1:5,
country_name = c("Brazil", "Canada", "China", "Ecuador", "France"))
data.frame(info = letters[1:5],
country_id = 1:5) %>%
left_join(countries, by = "country_id")
info country_id country_name
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
我的偏好是 2,更少的编码和更少的错误机会。您可以将国家/地区 table 保存在您的数据库或某个文件中并进行维护,而无需更改代码。
这是 switch
语句的一个很好的案例,在我看来,它比 dplyr::case_when
或一系列 ifelse
的代码更具可读性,并且易于扩展,例如,如果还有其他条件,例如地区、城市等
get_country <- Vectorize(function(x){
switch(as.character(x),
"1" = "Brazil", "2" = "Canada", "3" = "China", "4" = "Ecuador",
"5" = "France", "6" = "Germany", "7" = "India", "8" = "Italy",
"9" = "Mexico", "10" = "Nigeria", "11" = "Poland", "12" = "Russia",
"13" = "South Africa", "14" = "South Korea", "15" = "Singapore",
"16" = "Spain", "17" = "Sweden", "18" = "United Kingdom", "19" = "United States",
NA)
})
data.frame(info = letters[1:5],
country_id = 1:5) %>%
mutate(country = get_country(country_id))
info country_id country
1 a 1 Brazil
2 b 2 Canada
3 c 3 China
4 d 4 Ecuador
5 e 5 France
但是像这样的长语句需要大量的打字工作。或者,一种更动态的方法,我们可以使用采用输入向量的构造函数创建 switch
语句。这里我使用 maps
包中的 ISO3166 数据集来创建 269 个国家/地区的表达式。
constructor <- function(ids, names){
purrr::imap_chr(as.character(ids), ~paste(paste0("\"", .x ,"\""),
paste0("\"", names[.y], "\""),
sep = "=")) %>%
paste0(collapse = ", ") %>%
paste0("Vectorize(function(x) switch(as.character(x), ", ., ", NA))", collapse = "") %>%
str2expression()
}
get_country <- eval(constructor(1:149, trimws(rworldmap::countryExData$Country)))
set.seed(1)
data.frame(info = sample(letters, size = 5, replace = T),
country_id = sample.int(149, 5, replace = T)) %>%
mutate(country = get_country(country_id))
info country_id country
1 y 122 Sierra Leone
2 h 39 Algeria
3 l 42 Eritrea
4 y 134 Trinidad & Tobago
5 w 24 Chile
为了展示力量 - 让我们用另外 2 行代码为约 20000 个城市创建一个控制流
CITIES <- maps::world.cities %>% filter(pop > 10000) %>% arrange(desc(pop))
get_city <- eval(constructor(1:nrow(CITIES), trimws(CITIES$name)))
data.frame(city_id = sample.int(23255, size = 100, replace = T),
country_id = sample.int(269, 100, replace = T)) %>%
mutate(country = get_country(country_id),
city = get_city(city_id))
这种方法的一个好处是您可以轻松地优化您的控制流,方法是确保构造函数的输入向量得到优化,即最先发生的情况,然后可能制作支持 嵌套 方法的函数,例如 get_continent(get_country(get_city)))
.