用 dplyr 或 forcats 重新编码 NA 因子
Recode NA factor with dplyr or forcats
我正在尝试使用 tidyverse
中的 forcats
或 dplyr
包将单个因子从 NA
重新编码为字符串。我遇到的问题是我要更改的因素是 NA
值,我 运行 出错了。
我发现这个问题 (R how to change one of the level to NA) 正在将一个因子更改为 NA
但我正在尝试将其从 NA
.
更改
这是我尝试过的:
library(dplyr)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = recode_factor(Units, "No Response" = NA_character_))
# A tibble: 5 x 3
Units Frequency Total
<fct> <int> <dbl>
1 (0,3] 4 8
2 (3,10] 5 31
3 (10,30] 2 38
4 (100,Inf] 3 673
5 NA 1 0
Warning messages:
1: Problem with `mutate()` input `Units`.
i Unknown levels in `f`: NA
i Input `Units` is `fct_recode(Units, `No Response` = NA_character_)`.
2: Unknown levels in `f`: NA
和
library(forcats)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = fct_recode(Units, "No Response" = NA_character_))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 5 x 3
Units Frequency Total
<fct> <int> <dbl>
1 (0,3] 4 8
2 (3,10] 5 31
3 (10,30] 2 38
4 (100,Inf] 3 673
5 NA 1 0
示例数据:
df <- structure(list(ID = c("000002", "000008", "000009", "000018",
"000021", "000033", "000045", "000051", "000064", "000067", "000070",
"000072", "000074", "000088", "000112"), Responses = c(18, 6,
300, 8, 7, 150, 6, 4, 2, 3, 20, NA, 223, 2, 1), Units = structure(c(3L,
2L, 5L, 2L, 2L, 5L, 2L, 2L, 1L, 1L, 3L, NA, 5L, 1L, 1L), .Label = c("(0,3]",
"(3,10]", "(10,30]", "(30,100]", "(100,Inf]"), class = "factor")), row.names = c(NA,
-15L), class = c("tbl_df", "tbl", "data.frame"))
使用专门为处理NA
值而编写的fct_explicit_na
。
library(dplyr)
library(forcats)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = fct_explicit_na(Units, "No Response"))
# Units Frequency Total
#* <fct> <int> <dbl>
#1 (0,3] 4 8
#2 (3,10] 5 31
#3 (10,30] 2 38
#4 (100,Inf] 3 673
#5 No Response 1 0
您还可以在数据中包含新级别,然后使用 replace
更改 NA
值。
levels(df$Units) <- c(levels(df$Units), "No Response")
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = replace(Units, is.na(Units), "No Response"))
我正在尝试使用 tidyverse
中的 forcats
或 dplyr
包将单个因子从 NA
重新编码为字符串。我遇到的问题是我要更改的因素是 NA
值,我 运行 出错了。
我发现这个问题 (R how to change one of the level to NA) 正在将一个因子更改为 NA
但我正在尝试将其从 NA
.
这是我尝试过的:
library(dplyr)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = recode_factor(Units, "No Response" = NA_character_))
# A tibble: 5 x 3
Units Frequency Total
<fct> <int> <dbl>
1 (0,3] 4 8
2 (3,10] 5 31
3 (10,30] 2 38
4 (100,Inf] 3 673
5 NA 1 0
Warning messages:
1: Problem with `mutate()` input `Units`.
i Unknown levels in `f`: NA
i Input `Units` is `fct_recode(Units, `No Response` = NA_character_)`.
2: Unknown levels in `f`: NA
和
library(forcats)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = fct_recode(Units, "No Response" = NA_character_))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 5 x 3
Units Frequency Total
<fct> <int> <dbl>
1 (0,3] 4 8
2 (3,10] 5 31
3 (10,30] 2 38
4 (100,Inf] 3 673
5 NA 1 0
示例数据:
df <- structure(list(ID = c("000002", "000008", "000009", "000018",
"000021", "000033", "000045", "000051", "000064", "000067", "000070",
"000072", "000074", "000088", "000112"), Responses = c(18, 6,
300, 8, 7, 150, 6, 4, 2, 3, 20, NA, 223, 2, 1), Units = structure(c(3L,
2L, 5L, 2L, 2L, 5L, 2L, 2L, 1L, 1L, 3L, NA, 5L, 1L, 1L), .Label = c("(0,3]",
"(3,10]", "(10,30]", "(30,100]", "(100,Inf]"), class = "factor")), row.names = c(NA,
-15L), class = c("tbl_df", "tbl", "data.frame"))
使用专门为处理NA
值而编写的fct_explicit_na
。
library(dplyr)
library(forcats)
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = fct_explicit_na(Units, "No Response"))
# Units Frequency Total
#* <fct> <int> <dbl>
#1 (0,3] 4 8
#2 (3,10] 5 31
#3 (10,30] 2 38
#4 (100,Inf] 3 673
#5 No Response 1 0
您还可以在数据中包含新级别,然后使用 replace
更改 NA
值。
levels(df$Units) <- c(levels(df$Units), "No Response")
df %>%
group_by(Units) %>%
summarize(Frequency = n(),
Total = sum(Responses, na.rm = T)) %>%
mutate(Units = replace(Units, is.na(Units), "No Response"))