R:创建新列以按均值和标准差表示 hi/mid/low 个 bin
R: Creating new column to represent hi/mid/low bins by mean and standard deviation
我有一批调查数据,我希望能够将其子集化为具有 0-10 比例数据的几个特定列(例如,将您对 x 的态度分级为 0 到 10),以便我可以使用 ggplot() + facet_grid 进行绘图。 Faceting 将使用 3 hi/med/low 个 bin,计算为高于平均值的 +1 / -1 标准差。我有工作代码,它将整个数据帧分成 3 个部分,如下所示:
# Generate sample data:
structure(list(Q4 = c(2, 3, 3, 5, 4, 3), Q5 = c(1, 3, 3, 3, 2,
2), Q6 = c(4, 3, 3, 3, 4, 4), Q7 = c(4, 2, 3, 5, 5, 5), Q53_1 = c(5,
8, 4, 5, 4, 5)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
# Aquire Q53_1 data as factors
political_scale <- factor(climate_experience_data$Q53_1, levels = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
# Generate thresholds based on mean and standard deviation thresholds
low_threshold <- round(mean(as.numeric(political_scale, na.rm = T)) - sd(as.numeric(political_scale)), digits = 0)
high_threshold <- round(mean(as.numeric(political_scale, na.rm = T)) + sd(as.numeric(political_scale)), digits = 0)
# Generate low/med/high bins based on Mean and SD
political_lr_low <- filter(climate_experience_data, Q53_1 <= low_threshold)
political_lr_mid <- filter(climate_experience_data, Q53_1 < high_threshold & Q53_1 > low_threshold)
political_lr_high <- filter(climate_experience_data, Q53_1 >= high_threshold)
我意识到这种方法确实不适合分面。我怀疑我需要使用 mutate()
across()
where()
和 group_by()
的组合来将数据添加到新列 Q53_scale
和“hi” med" "low" 基于 Q53_1
值相对于那些 low/high 阈值的下降位置(例如,SD +1 高于平均值,-1 低于平均值)。我的前几十次尝试都失败了 - 有没有人设法使用 sd() 以这种方式对数据进行分类?
library(tidyverse)
climate_experience_data <- structure(list(Q4 = c(2, 3, 3, 5, 4, 3), Q5 = c(
1, 3, 3, 3, 2,
2
), Q6 = c(4, 3, 3, 3, 4, 4), Q7 = c(4, 2, 3, 5, 5, 5), Q53_1 = c(
5,
8, 4, 5, 4, 5
)), row.names = c(NA, -6L), class = c(
"tbl_df",
"tbl", "data.frame"
))
climate_experience_data %>%
mutate(
bin = case_when(
Q53_1 > mean(Q53_1) + sd(Q53_1) ~ "high",
Q53_1 < mean(Q53_1) - sd(Q53_1) ~ "low",
TRUE ~ "medium"
) %>% factor(levels = c("low", "medium", "high"))
) %>%
ggplot(aes(Q4, Q5)) +
geom_point() +
facet_grid(~bin)
由 reprex package (v2.0.0)
创建于 2022-03-10
我有一批调查数据,我希望能够将其子集化为具有 0-10 比例数据的几个特定列(例如,将您对 x 的态度分级为 0 到 10),以便我可以使用 ggplot() + facet_grid 进行绘图。 Faceting 将使用 3 hi/med/low 个 bin,计算为高于平均值的 +1 / -1 标准差。我有工作代码,它将整个数据帧分成 3 个部分,如下所示:
# Generate sample data:
structure(list(Q4 = c(2, 3, 3, 5, 4, 3), Q5 = c(1, 3, 3, 3, 2,
2), Q6 = c(4, 3, 3, 3, 4, 4), Q7 = c(4, 2, 3, 5, 5, 5), Q53_1 = c(5,
8, 4, 5, 4, 5)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
# Aquire Q53_1 data as factors
political_scale <- factor(climate_experience_data$Q53_1, levels = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
# Generate thresholds based on mean and standard deviation thresholds
low_threshold <- round(mean(as.numeric(political_scale, na.rm = T)) - sd(as.numeric(political_scale)), digits = 0)
high_threshold <- round(mean(as.numeric(political_scale, na.rm = T)) + sd(as.numeric(political_scale)), digits = 0)
# Generate low/med/high bins based on Mean and SD
political_lr_low <- filter(climate_experience_data, Q53_1 <= low_threshold)
political_lr_mid <- filter(climate_experience_data, Q53_1 < high_threshold & Q53_1 > low_threshold)
political_lr_high <- filter(climate_experience_data, Q53_1 >= high_threshold)
我意识到这种方法确实不适合分面。我怀疑我需要使用 mutate()
across()
where()
和 group_by()
的组合来将数据添加到新列 Q53_scale
和“hi” med" "low" 基于 Q53_1
值相对于那些 low/high 阈值的下降位置(例如,SD +1 高于平均值,-1 低于平均值)。我的前几十次尝试都失败了 - 有没有人设法使用 sd() 以这种方式对数据进行分类?
library(tidyverse)
climate_experience_data <- structure(list(Q4 = c(2, 3, 3, 5, 4, 3), Q5 = c(
1, 3, 3, 3, 2,
2
), Q6 = c(4, 3, 3, 3, 4, 4), Q7 = c(4, 2, 3, 5, 5, 5), Q53_1 = c(
5,
8, 4, 5, 4, 5
)), row.names = c(NA, -6L), class = c(
"tbl_df",
"tbl", "data.frame"
))
climate_experience_data %>%
mutate(
bin = case_when(
Q53_1 > mean(Q53_1) + sd(Q53_1) ~ "high",
Q53_1 < mean(Q53_1) - sd(Q53_1) ~ "low",
TRUE ~ "medium"
) %>% factor(levels = c("low", "medium", "high"))
) %>%
ggplot(aes(Q4, Q5)) +
geom_point() +
facet_grid(~bin)
由 reprex package (v2.0.0)
创建于 2022-03-10