R 中的 5x5 气泡图
5x5 Bubble Chart in R
我正在尝试在 R Shiny 中重现类似于加州学校责任仪表板上使用的图形。为了这个例子,考虑以下数据框:
student <- c("1234", "4321", "5678", "8765")
schools <- c("ABC", "ABC", "XYZ", "XYZ")
DFS_20 <- c(-34.2, -1.5, 2.8, 8.9)
DFS_21 <- c(-13.5, 27.8, 5.4, 3.9)
school_data <- data.frame("student_id" = student, "school_id" = schools, "DFS_2020" = DFS_20, "DFS_2021" = DFS_21, "Delta_DFS" = DFS_21 - DFS_20)
我想以某种方式将这些数据绘制在这样的网格上:
您将在 [x, y] = [4, 1] 处有一个数据点(左下角为 (0,0) 代表学生 1234,因为他们的 DFS_2021 分数很低(- 13.5),但他们的同比增长显着增加 (20.7);学生 4321 在 [x, y] = [4, 3] 中的一个点,因为他们的 DFS_2021 分数高 (27.8) 并且他们的年份-年增长率显着增加 (29.3) 等。我想要一个气泡图,以便相对于每个单元格内的数据点数量增加点的大小,但我不知道从哪里开始创建 canvas(用颜色)覆盖数据点。我知道我可以将他们的分数转化为格点以绘制在 5x5 网格上,但是用颜色制作网格超出了我的技能范围。
简而言之,您可以将值重新编码为因子并计算数据集中的每个组合。有了这个新的 table(包含当前 DFS 级别、DFS 差异级别和每个类别中的学生人数),您可以轻松创建点图。
要对您的点进行颜色编码,您可能需要在 table 中添加一个带有颜色的额外列。因此我创建了一个元颜色 table(所有 DFS 组合和相关颜色)并加入了 tables.
代码
# load packages
library(tidyverse)
# create color table
df_col <- crossing(DFS_current_status = factor(c("very low", "low", "medium",
"high", "very high"),
levels = c("very high", "high",
"medium", "low", "very low")),
DFS_diff = factor(c("declined significantly", "declined",
"maintained" ,"increased",
"increased significantly"),
levels = c("declined significantly",
"declined", "maintained",
"increased", "increased significantly"))) %>%
add_column(color = c("green", "green", "blue", "blue","blue",
"green", "green", "green", "green", "blue",
"yellow", "yellow", "yellow", "green", "green",
"orange", "orange", "orange", "yellow", "yellow",
"red", "red", "red", "orange", "orange"))
# transform data
df <- school_data %>%
mutate(DFS_current_status = case_when(DFS_2021 >= 45 ~ "very high",
between(DFS_2021, 10, 44.9) ~ "high",
between(DFS_2021, -5, 9.9) ~ "medium",
between(DFS_2021, -70, -5.1) ~ "low",
DFS_2021 < -70 ~ "very low",
TRUE ~ NA_character_),
DFS_diff = case_when(Delta_DFS < -15 ~ "declined significantly",
between(Delta_DFS, -15, -3) ~ "declined",
between(Delta_DFS, -2.9, 2.9) ~ "maintained",
between(Delta_DFS, 3, 14.9) ~ "increased",
Delta_DFS >= 15 ~ "increased significantly",
TRUE ~ NA_character_)) %>%
count(DFS_current_status, DFS_diff) %>%
left_join(df_col) %>%
mutate(DFS_current_status = factor(DFS_current_status,
levels = rev(c("very high", "high",
"medium", "low", "very low"))),
DFS_diff = factor(DFS_diff,
levels = c("declined significantly",
"declined", "maintained",
"increased", "increased significantly")))
# create plot
p <- ggplot(df) +
geom_point(aes(x = DFS_diff,
y = DFS_current_status,
size = n,
color = color)) +
scale_y_discrete(drop = F) +
scale_x_discrete(drop = F, position = "top") +
scale_color_identity()
# display plot in plotly
ggplotly(p) %>%
layout(xaxis = list(side ="top"))
情节
编辑:评论 - 为网格而不是点着色
df_col %>%
ggplot() +
geom_raster(aes(x = DFS_diff,
y = rev(DFS_current_status),
fill= color)) +
scale_fill_identity() +
scale_x_discrete(position = "top") +
geom_point(data = df, aes(x = DFS_diff,
y = DFS_current_status,
size = n))
我正在尝试在 R Shiny 中重现类似于加州学校责任仪表板上使用的图形。为了这个例子,考虑以下数据框:
student <- c("1234", "4321", "5678", "8765")
schools <- c("ABC", "ABC", "XYZ", "XYZ")
DFS_20 <- c(-34.2, -1.5, 2.8, 8.9)
DFS_21 <- c(-13.5, 27.8, 5.4, 3.9)
school_data <- data.frame("student_id" = student, "school_id" = schools, "DFS_2020" = DFS_20, "DFS_2021" = DFS_21, "Delta_DFS" = DFS_21 - DFS_20)
我想以某种方式将这些数据绘制在这样的网格上:
您将在 [x, y] = [4, 1] 处有一个数据点(左下角为 (0,0) 代表学生 1234,因为他们的 DFS_2021 分数很低(- 13.5),但他们的同比增长显着增加 (20.7);学生 4321 在 [x, y] = [4, 3] 中的一个点,因为他们的 DFS_2021 分数高 (27.8) 并且他们的年份-年增长率显着增加 (29.3) 等。我想要一个气泡图,以便相对于每个单元格内的数据点数量增加点的大小,但我不知道从哪里开始创建 canvas(用颜色)覆盖数据点。我知道我可以将他们的分数转化为格点以绘制在 5x5 网格上,但是用颜色制作网格超出了我的技能范围。
简而言之,您可以将值重新编码为因子并计算数据集中的每个组合。有了这个新的 table(包含当前 DFS 级别、DFS 差异级别和每个类别中的学生人数),您可以轻松创建点图。
要对您的点进行颜色编码,您可能需要在 table 中添加一个带有颜色的额外列。因此我创建了一个元颜色 table(所有 DFS 组合和相关颜色)并加入了 tables.
代码
# load packages
library(tidyverse)
# create color table
df_col <- crossing(DFS_current_status = factor(c("very low", "low", "medium",
"high", "very high"),
levels = c("very high", "high",
"medium", "low", "very low")),
DFS_diff = factor(c("declined significantly", "declined",
"maintained" ,"increased",
"increased significantly"),
levels = c("declined significantly",
"declined", "maintained",
"increased", "increased significantly"))) %>%
add_column(color = c("green", "green", "blue", "blue","blue",
"green", "green", "green", "green", "blue",
"yellow", "yellow", "yellow", "green", "green",
"orange", "orange", "orange", "yellow", "yellow",
"red", "red", "red", "orange", "orange"))
# transform data
df <- school_data %>%
mutate(DFS_current_status = case_when(DFS_2021 >= 45 ~ "very high",
between(DFS_2021, 10, 44.9) ~ "high",
between(DFS_2021, -5, 9.9) ~ "medium",
between(DFS_2021, -70, -5.1) ~ "low",
DFS_2021 < -70 ~ "very low",
TRUE ~ NA_character_),
DFS_diff = case_when(Delta_DFS < -15 ~ "declined significantly",
between(Delta_DFS, -15, -3) ~ "declined",
between(Delta_DFS, -2.9, 2.9) ~ "maintained",
between(Delta_DFS, 3, 14.9) ~ "increased",
Delta_DFS >= 15 ~ "increased significantly",
TRUE ~ NA_character_)) %>%
count(DFS_current_status, DFS_diff) %>%
left_join(df_col) %>%
mutate(DFS_current_status = factor(DFS_current_status,
levels = rev(c("very high", "high",
"medium", "low", "very low"))),
DFS_diff = factor(DFS_diff,
levels = c("declined significantly",
"declined", "maintained",
"increased", "increased significantly")))
# create plot
p <- ggplot(df) +
geom_point(aes(x = DFS_diff,
y = DFS_current_status,
size = n,
color = color)) +
scale_y_discrete(drop = F) +
scale_x_discrete(drop = F, position = "top") +
scale_color_identity()
# display plot in plotly
ggplotly(p) %>%
layout(xaxis = list(side ="top"))
情节
编辑:评论 - 为网格而不是点着色
df_col %>%
ggplot() +
geom_raster(aes(x = DFS_diff,
y = rev(DFS_current_status),
fill= color)) +
scale_fill_identity() +
scale_x_discrete(position = "top") +
geom_point(data = df, aes(x = DFS_diff,
y = DFS_current_status,
size = n))