重命名所有级别的因子变量,是否有一种 tidyverse 方法可以做到这一点?
Rename all levels of factor variable, is there a tidyverse way to do it?
我偶尔需要重命名因子变量的所有级别。我知道如何使用 R base 实现这一点,例如:levels(factor_variable) <- levels(new_variable)
。但我真的很想有一种方法可以使用 tidyverse
来做这种事情。我查看了 dplyr
和 forcats
但我没有找到任何解决方法。我希望能够实现我在示例 1 中实现的目标,但要使用 %>%
运算符。
示例 1,以 R 为基础(有效)
my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
levels(sample_data$factor_data) <- levels(factor(my_new_levels))
示例 2,我尝试但不使用 tidyverse 的一件事
library(tidyverse)
my_levels <- letters
sample_data <- tibble(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
# Get error
sample_data <- sample_data %>%
mutate(levels(factor_data) = levels(factor(my_new_levels)))
# Get error
sample_data <- sample_data %>%
mutate(factor_data = recode(factor_data, levels(factor_data) = levels(factor(my_new_levels))))
我也尝试重新编码,但它不仅是手动的(一次每个值),而且它也不接受 %>%
运算符。这是我尝试查看发生了什么的一些事情:
sample_data <- sample_data %>%
recode(factor_data, a = '-2.5')
sample_data <- sample_data %>%
recode_factor(factor_data, a = '-2.5')
recode(sample_data$factor_data, levels(sample_data$factor_data) = levels(factor(my_new_levels)))
recode(sample_data$factor_data, a = '-2.5')
recode_factor(sample_data$factor_data, a = '-2.5')
您可以使用命名向量轻松做到这一点 forcats::fct_recode()
:
library(tidyverse)
set.seed(42)
my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
# create a named vector with the new levels
named_level_vector <- levels(sample_data$factor_data)
names(named_level_vector) <- my_new_levels
# use mutate and fct_recode with that vector
sample_data <- sample_data %>%
mutate(new_factor_data = forcats::fct_recode(factor_data, !!!named_level_vector))
head(sample_data)
#> factor_data Any_other_data new_factor_data
#> 1 q 0.48236947 0.223521215874458
#> 2 e 0.99294364 -1.12828853519737
#> 3 a -1.24639550 -2.55382485095083
#> 4 y -0.03348752 1.67099730539817
#> 5 j -0.07096218 -0.318990710826149
#> 6 d -0.75892065 -1.17990419995829
由 reprex package (v0.3.0)
于 2020-06-11 创建
我偶尔需要重命名因子变量的所有级别。我知道如何使用 R base 实现这一点,例如:levels(factor_variable) <- levels(new_variable)
。但我真的很想有一种方法可以使用 tidyverse
来做这种事情。我查看了 dplyr
和 forcats
但我没有找到任何解决方法。我希望能够实现我在示例 1 中实现的目标,但要使用 %>%
运算符。
示例 1,以 R 为基础(有效)
my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
levels(sample_data$factor_data) <- levels(factor(my_new_levels))
示例 2,我尝试但不使用 tidyverse 的一件事
library(tidyverse)
my_levels <- letters
sample_data <- tibble(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
# Get error
sample_data <- sample_data %>%
mutate(levels(factor_data) = levels(factor(my_new_levels)))
# Get error
sample_data <- sample_data %>%
mutate(factor_data = recode(factor_data, levels(factor_data) = levels(factor(my_new_levels))))
我也尝试重新编码,但它不仅是手动的(一次每个值),而且它也不接受 %>%
运算符。这是我尝试查看发生了什么的一些事情:
sample_data <- sample_data %>%
recode(factor_data, a = '-2.5')
sample_data <- sample_data %>%
recode_factor(factor_data, a = '-2.5')
recode(sample_data$factor_data, levels(sample_data$factor_data) = levels(factor(my_new_levels)))
recode(sample_data$factor_data, a = '-2.5')
recode_factor(sample_data$factor_data, a = '-2.5')
您可以使用命名向量轻松做到这一点 forcats::fct_recode()
:
library(tidyverse)
set.seed(42)
my_levels <- letters
sample_data <- data.frame(factor_data = factor(sample(my_levels,size = 500,replace = T) ,
levels = my_levels),
Any_other_data = rnorm(500))
my_new_levels <- rnorm(length(letters))
# create a named vector with the new levels
named_level_vector <- levels(sample_data$factor_data)
names(named_level_vector) <- my_new_levels
# use mutate and fct_recode with that vector
sample_data <- sample_data %>%
mutate(new_factor_data = forcats::fct_recode(factor_data, !!!named_level_vector))
head(sample_data)
#> factor_data Any_other_data new_factor_data
#> 1 q 0.48236947 0.223521215874458
#> 2 e 0.99294364 -1.12828853519737
#> 3 a -1.24639550 -2.55382485095083
#> 4 y -0.03348752 1.67099730539817
#> 5 j -0.07096218 -0.318990710826149
#> 6 d -0.75892065 -1.17990419995829
由 reprex package (v0.3.0)
于 2020-06-11 创建