R: mapped_discrete` 对象只能从数字向量创建

R: mapped_discrete` objects can only be created from numeric vectors

我在 R 中有以下数据:

df <- structure(list(t0 = c(3.82, -4.88, NA, -3.83, -3.99, NA, NA, 
NA, 6.35, 2.47, 0.28, 0.3, NA, 8.31, NA, NA, NA, 2.76, NA, 1.38
), t1 = c(NA, NA, NA, NA, NA, NA, -1.23, 2.19, 4.13, 3.49, -0.42, 
NA, 3.78, 2.7, 1.17, NA, NA, NA, NA, NA), t2 = c(-1.85, NA, 1.46, 
0.17, NA, NA, -2.81, 1.75, NA, 2.32, -3.08, -1.39, NA, 7.53, 
1.77, NA, 0.1, NA, NA, -2.61), t3 = c(-2.05, 3.73, -2.04, -0.22, 
-4.29, NA, NA, -0.11, 0.43, NA, -0.78, 3.24, NA, NA, -1.13, 1.09, 
NA, NA, 2.7, NA), t4 = c(1.01, -2.77, NA, -3.05, -2.33, 3.78, 
NA, NA, NA, NA, -2.04, -4.01, -2.32, 4, -0.28, NA, NA, 9.04, 
NA, -4.12), t5 = c(1.56, NA, 4.89, NA, NA, NA, NA, NA, 0.88, 
3.15, NA, NA, 2.59, NA, 2.04, NA, NA, NA, -0.26, NA), t6 = c(0.34, 
-0.99, NA, 1.93, NA, NA, NA, NA, 0.35, NA, -6.46, NA, NA, NA, 
2.57, NA, NA, 4.89, NA, -5.63), t7 = c(0.52, NA, 0.5, 1.85, -6.23, 
NA, NA, 1.59, 7.82, 0.82, NA, NA, -1.77, NA, NA, NA, 2.01, NA, 
0.7, -1.55), t8 = c(NA, NA, 4.9, -3.93, -8.13, 3.14, 0.03, 1.67, 
3.55, NA, -1.55, 2.57, -0.87, NA, 0.71, -0.1, NA, NA, 2.04, NA
), t9 = c(-1.09, NA, -0.52, NA, NA, NA, NA, NA, NA, 2.05, -5.21, 
-0.89, -0.03, NA, 0.66, 3.72, -1.96, NA, NA, NA)), row.names = c(NA, 
20L), class = "data.frame")

使用以下教程 (https://jenslaufer.com/data/analysis/visualize_missing_values_with_ggplot.html),我正在尝试制作显示缺失数据百分比的可视化:

library(dplyr)
library(ggplot2)
library(tidyverse)

row.plot <- df %>%
  mutate(id = row_number()) %>%
  gather(-id, key = "key", value = "val") %>%
  mutate(isna = is.na(val)) %>%
  ggplot(aes(key, id, fill = isna)) +
    geom_raster(alpha=0.8) +
    scale_fill_manual(name = "",
        values = c('steelblue', 'tomato3'),
        labels = c("Present", "Missing")) +
    scale_x_discrete(limits = levels) +
    labs(x = "Variable",
           y = "Row Number", title = "Missing values in rows") +
    coord_flip()

当我尝试查看结果时,这是我得到的错误:

row.plot

Error in `new_mapped_discrete()`:
! `mapped_discrete` objects can only be created from numeric vectors
Run `rlang::last_error()` to see where the error occurred.
Warning messages:
1: In structure(in_domain, pos = match(in_domain, breaks)) :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
2: In structure(in_domain, pos = match(in_domain, breaks)) :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
3: Removed 200 rows containing missing values (geom_raster). 

我的问题:有人可以告诉我我做错了什么吗?我该如何解决这个错误?最后想得到这样一张图:

谢谢!

您似乎想要为每一行的缺失数据而不是为每个变量生成此图(尽管我在此处提供了这两种数据)。主要问题是没有提供 levels,所以我们可以在这里创建它,然后作为一个因素提供给 scale_x_discrete

library(tidyverse)

output <- df %>%
  mutate(id = row_number()) %>%
  pivot_longer(-id, names_to = "key", values_to = "val") %>%
  select(-key) %>%
  group_by(id) %>%
  mutate(isna = is.na(val),
         total = n()) %>%
  group_by(id, total, isna) %>%
  summarise(num.isna = n()) %>%
  mutate(pct = num.isna / total * 100)

levels <- output %>% filter(isna == T) %>% arrange(desc(pct)) %>% pull(id)

row.plot <- output %>% 
  ggplot() +
  geom_bar(aes(
    x = reorder(id, desc(pct)),
    y = pct,
    fill = isna
  ),
  stat = 'identity',
  alpha = 0.8) +
  scale_x_discrete(limits = factor(levels)) +
  scale_fill_manual(
    name = "",
    values = c('steelblue', 'tomato3'),
    labels = c("Present", "Missing")
  ) +
  coord_flip() +
  labs(title = "Percentage of missing values", x =
         'Row Number', y = "% of missing values")

输出

或者如果你想通过变量来做,那么:

output <- df %>%
  pivot_longer(everything(), names_to = "key", values_to = "val") %>%
  group_by(key) %>%
  mutate(isna = is.na(val),
         total = n()) %>%
  group_by(key, total, isna) %>%
  summarise(num.isna = n()) %>%
  mutate(pct = num.isna / total * 100)

levels <- output %>% filter(isna == T) %>% arrange(desc(pct)) %>% pull(key)


row.plot <- output %>% 
  ggplot() +
  geom_bar(aes(
    x = reorder(key, desc(pct)),
    y = pct,
    fill = isna
  ),
  stat = 'identity',
  alpha = 0.8) +
  scale_x_discrete(limits = levels) +
  scale_fill_manual(
    name = "",
    values = c('steelblue', 'tomato3'),
    labels = c("Present", "Missing")
  ) +
  coord_flip() +
  labs(title = "Percentage of missing values", x =
         'Variable', y = "% of missing values")

输出

错误是由scale_x_discrete引起的。
您不需要它,因为在您的示例中 id 是数字并且没有 levels 作为 factor 会:

df  %>%
  mutate(id = row_number()) %>%
  gather(-id, key = "key", value = "val") %>%
  mutate(isna = is.na(val)) %>%
  ggplot(aes(key, id, fill = isna)) +
  geom_raster(alpha=0.8) +
  scale_fill_manual(name = "",
                    values = c('steelblue', 'tomato3'),
                    labels = c("Present", "Missing")) +
  #scale_x_discrete(limits = levels) 
  labs(x = "Variable",
       y = "Row Number", title = "Missing values in rows") +
  coord_flip()

当我运行你教程中的代码和你的数据时,没有错误。也许你想要这样的东西:

library(tidyverse)
missing.values <- df %>%
  gather(key = "key", value = "val") %>%
  mutate(isna = is.na(val)) %>%
  group_by(key) %>%
  mutate(total = n()) %>%
  group_by(key, total, isna) %>%
  summarise(num.isna = n()) %>%
  mutate(pct = num.isna / total * 100)

levels <- (missing.values  %>% filter(isna == T) %>% arrange(desc(pct)))$key

percentage.plot <- missing.values %>%
  ggplot() +
  geom_bar(aes(x = reorder(key, desc(pct)), y = pct, fill=isna), stat = 'identity', alpha=0.8, width = 1) +
  scale_x_discrete(limits = levels) +
  scale_fill_manual(name = "", values = c('goldenrod3', 'firebrick3'), labels = c("Present", "Missing")) +
  coord_flip() +
  labs(title = "Percentage of missing values", x = 'Variable', y = "% of missing values") + 
  theme_bw() +
  theme(panel.grid = element_blank(),
        panel.border = element_blank())

输出: