r - 使用循环为多对变量创建单向方差分析、汇总统计和绘图
r - Create One-way ANOVAs, summary statistics and plots for multiple pairs of variables using loop
我是新来的,对编程很陌生,所以任何帮助将不胜感激。
我有一个数据框 df1,它看起来像这样:
Picture
Emotion
Gender
Type
Trial
Attr_scores
Fear_scores
Appr_scores
Avoid_scores
1
happy
male
human
first
11
3
21
21
2
sad
male
human
first
12
6
22
22
3
neutral
male
human
first
13
2
23
23
4
happy
male
cartoon
first
14
3
24
24
5
sad
male
cartoon
first
15
6
25
25
6
neutral
male
cartoon
first
16
2
26
26
7
happy
male
animal
first
17
3
27
27
8
sad
male
animal
first
18
6
28
28
9
neutral
male
animal
first
19
2
29
29
10
happy
female
human
first
20
3
21
30
11
sad
female
human
first
21
6
22
31
12
neutral
female
human
first
22
2
23
32
13
happy
female
cartoon
first
23
3
24
33
14
sad
female
cartoon
first
24
6
25
34
15
neutral
female
cartoon
first
25
2
26
35
16
happy
female
animal
first
26
3
27
36
17
sad
female
animal
first
27
6
28
37
18
neutral
female
animal
first
28
2
29
38
这是生成它的代码:
Picture <- c(1:18)
Emotion <- rep(c('happy','sad','neutral'),times=6)
Gender <- rep(c('male','female'),each=9)
Type <- rep(c('human','cartoon','animal','human','cartoon','animal'),each=3)
Trial <- rep(c('first'),times=18)
Attr_scores <- c(11:28)
Fear_scores <- rep(c(3,6,2),times=6)
Appr_scores <- rep(c(21:29),times=2)
Avoid_scores <- c(21:38)
df1<-data.frame(Picture,Emotion,Gender,Type,Trial,Attr_scores,Fear_scores,Appr_scores,Avoid_scores)
我需要取几对变量(一个自变量 + 一个因变量,例如情绪 + Attr_scores、情绪 + Fear_scores、性别 + Attr_scores、性别 + Avoid_scores),并且对于它们中的每一个:1) 运行 汇总统计(比较均值和标准差),2) 运行 单向方差分析,3) 创建一个散点图。
到目前为止,我已经为第一对变量(性别 + Attr_scores)创建了代码。这是代码:
# Summary Statistics
library(dplyr)
group_by(df1, Gender) %>%
summarise(
N = n(),
Mean = mean(Attr_scores, na.rm = TRUE),
Sd = sd(Attr_scores, na.rm = TRUE)
)
# ANOVA
res.aov <- aov(Attr_scores ~ Gender, data = df1)
summary(res.aov)
#Plot
gender_attr_plot <- ggplot(df1, aes(x=Gender, y=Attr_scores)) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave("gender_attr_plot.png", gender_attr_plot, width = 1600, height = 900, units = "px")
我可以为每一对额外的变量复制粘贴代码并每次手动更改变量名称,但这似乎是一种非常低效的做事方式。此外,如果我需要 运行 对任何额外的变量对进行相同的分析,我将不得不再次复制整个代码才能做到这一点。
我想做的是创建一个 table 或带有变量对的嵌套列表(如果需要额外的变量对,以后可以轻松更新)并编写一个循环这些变量对并为每个变量执行所有 3 个操作(汇总统计、方差分析和绘图)。
我认为它应该看起来像这样(这与实际的工作代码相去甚远,它只是给出一个大概的想法):
variables <- list(
c(Gender, Attr_scores),
c(Gender, Fear_scores),
c(Type, Appr_scores),
c(Emotion, Avoid_scores))
for(i in variables){
library(dplyr)
group_by(df1, variables,'[[',1) %>%
summarise(
N = n(),
Mean = mean(variables,'[[',2, na.rm = TRUE),
Sd = sd(variables,'[[',2, na.rm = TRUE)
)
res.aov <- aov(variables,'[[',2 ~ variables,'[[',1, data = df1)
summary(res.aov)
plot <- ggplot(df1, aes(x=variables,'[[',1, y=variables,'[[',2)) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave("??????.png", plot, width = 1600, height = 900, units = "px")
}
显然,这是行不通的,我一直在互联网上搜索解决方案,但我对 R 的了解还不足以弄清楚如何让它工作。任何帮助将不胜感激!
这可能有用
https://r4ds.had.co.nz/iteration.html#the-map-functions
https://aosmith.rbind.io/2018/08/20/automating-exploratory-plots/
variables <-
structure(list(
x = c("Gender", "Gender", "Type", "Emotion"),
y = c("Attr_scores", "Fear_scores", "Appr_scores", "Avoid_scores")
),
class = "data.frame",
row.names = c(NA,-4L))
variables
#> x y
#> 1 Gender Attr_scores
#> 2 Gender Fear_scores
#> 3 Type Appr_scores
#> 4 Emotion Avoid_scores
library(tidyverse)
# GROUP
map2(
.x = variables$x,
.y = variables$y,
.f = ~ group_by(df,!!sym(.x)) %>%
summarise(
N = n(),
Mean = mean(!!sym(.y), na.rm = TRUE),
Sd = sd(!!sym(.y), na.rm = TRUE)
)) %>%
set_names(nm = str_c(variables$x, variables$y, sep = "#"))
#> $`Gender#Attr_scores`
#> # A tibble: 2 x 4
#> Gender N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 female 9 24 2.74
#> 2 male 9 15 2.74
#>
#> $`Gender#Fear_scores`
#> # A tibble: 2 x 4
#> Gender N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 female 9 3.67 1.80
#> 2 male 9 3.67 1.80
#>
#> $`Type#Appr_scores`
#> # A tibble: 3 x 4
#> Type N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 animal 6 28 0.894
#> 2 cartoon 6 25 0.894
#> 3 human 6 22 0.894
#>
#> $`Emotion#Avoid_scores`
#> # A tibble: 3 x 4
#> Emotion N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 happy 6 28.5 5.61
#> 2 neutral 6 30.5 5.61
#> 3 sad 6 29.5 5.61
# ANOVA
map2(
.x = variables$x,
.y = variables$y,
.f = ~ aov(as.formula(str_c(.y, .x, sep = "~")), data = df)
) %>%
set_names(nm = str_c(variables$x, variables$y, sep = "#"))
#> $`Gender#Attr_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Gender Residuals
#> Sum of Squares 364.5 120.0
#> Deg. of Freedom 1 16
#>
#> Residual standard error: 2.738613
#> Estimated effects may be unbalanced
#>
#> $`Gender#Fear_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Gender Residuals
#> Sum of Squares 0 52
#> Deg. of Freedom 1 16
#>
#> Residual standard error: 1.802776
#> Estimated effects may be unbalanced
#>
#> $`Type#Appr_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Type Residuals
#> Sum of Squares 108 12
#> Deg. of Freedom 2 15
#>
#> Residual standard error: 0.8944272
#> Estimated effects may be unbalanced
#>
#> $`Emotion#Avoid_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Emotion Residuals
#> Sum of Squares 12.0 472.5
#> Deg. of Freedom 2 15
#>
#> Residual standard error: 5.612486
#> Estimated effects may be unbalanced
#PLOT
f <- function(x, y) {
gender_attr_plot <- ggplot(df, aes(x = .data[[x]], y = .data[[y]])) +
geom_jitter(position = position_jitter(0.2)) +
stat_summary(
fun.data = mean_sdl,
fun.args = list(mult = 1),
geom = "pointrange",
color = "red"
)
}
all_plots <- map2(.x = variables$x, .y = variables$y, .f = f)
plotnames <- str_c(variables$x, "#", variables$y, ".png")
walk2(
.x = plotnames,
.y = all_plots,
.f = ~ ggsave(
filename = .x,
plot = .y,
width = 1600,
height = 900,
units = "px"
)
)
由 reprex package (v2.0.1)
于 2021-10-25 创建
数据
Picture <- c(1:18)
Emotion <- rep(c('happy', 'sad', 'neutral'), times = 6)
Gender <- rep(c('male', 'female'), each = 9)
Type <-
rep(c('human', 'cartoon', 'animal', 'human', 'cartoon', 'animal'),
each = 3)
Trial <- rep(c('first'), times = 18)
Attr_scores <- c(11:28)
Fear_scores <- rep(c(3, 6, 2), times = 6)
Appr_scores <- rep(c(21:29), times = 2)
Avoid_scores <- c(21:38)
df <-
data.frame(
Picture,
Emotion,
Gender,
Type,
Trial,
Attr_scores,
Fear_scores,
Appr_scores,
Avoid_scores
)
以下是您的任务的可能解决方案:
我稍微修改了您的代码并创建了一个函数 my_function
,使用此函数您可以获得一对数据集的所需输出。结果是列表中的return!
library(dplyr)
library(ggplot2)
my_function <- function(df, x, y) {
# Summary
a <- group_by(df, {{x}}) %>%
summarise(
N = n(),
Mean = mean({{y}}, na.rm = TRUE),
Sd = sd({{y}}, na.rm = TRUE)
)
# ANOVA
res.aov <- aov({{y}} ~ {{x}}, data = df)
b <- summary(res.aov)
# Plot
c <- ggplot(df1, aes(x={{x}}, y={{y}})) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave(paste0(deparse(substitute(x)), "_",
deparse(substitute(y)), ".png"), width = 1600, height = 900, units = "px")
output<-list(a,b,c)
return(output)
}
# cases 1 - 4
my_function(df1, Gender, Attr_scores)
my_function(df1, Gender, Avoid_scores)
my_function(df1, Emotion, Attr_scores)
my_function(df1, Emotion, Fear_scores)
我是新来的,对编程很陌生,所以任何帮助将不胜感激。
我有一个数据框 df1,它看起来像这样:
Picture | Emotion | Gender | Type | Trial | Attr_scores | Fear_scores | Appr_scores | Avoid_scores |
---|---|---|---|---|---|---|---|---|
1 | happy | male | human | first | 11 | 3 | 21 | 21 |
2 | sad | male | human | first | 12 | 6 | 22 | 22 |
3 | neutral | male | human | first | 13 | 2 | 23 | 23 |
4 | happy | male | cartoon | first | 14 | 3 | 24 | 24 |
5 | sad | male | cartoon | first | 15 | 6 | 25 | 25 |
6 | neutral | male | cartoon | first | 16 | 2 | 26 | 26 |
7 | happy | male | animal | first | 17 | 3 | 27 | 27 |
8 | sad | male | animal | first | 18 | 6 | 28 | 28 |
9 | neutral | male | animal | first | 19 | 2 | 29 | 29 |
10 | happy | female | human | first | 20 | 3 | 21 | 30 |
11 | sad | female | human | first | 21 | 6 | 22 | 31 |
12 | neutral | female | human | first | 22 | 2 | 23 | 32 |
13 | happy | female | cartoon | first | 23 | 3 | 24 | 33 |
14 | sad | female | cartoon | first | 24 | 6 | 25 | 34 |
15 | neutral | female | cartoon | first | 25 | 2 | 26 | 35 |
16 | happy | female | animal | first | 26 | 3 | 27 | 36 |
17 | sad | female | animal | first | 27 | 6 | 28 | 37 |
18 | neutral | female | animal | first | 28 | 2 | 29 | 38 |
这是生成它的代码:
Picture <- c(1:18)
Emotion <- rep(c('happy','sad','neutral'),times=6)
Gender <- rep(c('male','female'),each=9)
Type <- rep(c('human','cartoon','animal','human','cartoon','animal'),each=3)
Trial <- rep(c('first'),times=18)
Attr_scores <- c(11:28)
Fear_scores <- rep(c(3,6,2),times=6)
Appr_scores <- rep(c(21:29),times=2)
Avoid_scores <- c(21:38)
df1<-data.frame(Picture,Emotion,Gender,Type,Trial,Attr_scores,Fear_scores,Appr_scores,Avoid_scores)
我需要取几对变量(一个自变量 + 一个因变量,例如情绪 + Attr_scores、情绪 + Fear_scores、性别 + Attr_scores、性别 + Avoid_scores),并且对于它们中的每一个:1) 运行 汇总统计(比较均值和标准差),2) 运行 单向方差分析,3) 创建一个散点图。
到目前为止,我已经为第一对变量(性别 + Attr_scores)创建了代码。这是代码:
# Summary Statistics
library(dplyr)
group_by(df1, Gender) %>%
summarise(
N = n(),
Mean = mean(Attr_scores, na.rm = TRUE),
Sd = sd(Attr_scores, na.rm = TRUE)
)
# ANOVA
res.aov <- aov(Attr_scores ~ Gender, data = df1)
summary(res.aov)
#Plot
gender_attr_plot <- ggplot(df1, aes(x=Gender, y=Attr_scores)) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave("gender_attr_plot.png", gender_attr_plot, width = 1600, height = 900, units = "px")
我可以为每一对额外的变量复制粘贴代码并每次手动更改变量名称,但这似乎是一种非常低效的做事方式。此外,如果我需要 运行 对任何额外的变量对进行相同的分析,我将不得不再次复制整个代码才能做到这一点。
我想做的是创建一个 table 或带有变量对的嵌套列表(如果需要额外的变量对,以后可以轻松更新)并编写一个循环这些变量对并为每个变量执行所有 3 个操作(汇总统计、方差分析和绘图)。
我认为它应该看起来像这样(这与实际的工作代码相去甚远,它只是给出一个大概的想法):
variables <- list(
c(Gender, Attr_scores),
c(Gender, Fear_scores),
c(Type, Appr_scores),
c(Emotion, Avoid_scores))
for(i in variables){
library(dplyr)
group_by(df1, variables,'[[',1) %>%
summarise(
N = n(),
Mean = mean(variables,'[[',2, na.rm = TRUE),
Sd = sd(variables,'[[',2, na.rm = TRUE)
)
res.aov <- aov(variables,'[[',2 ~ variables,'[[',1, data = df1)
summary(res.aov)
plot <- ggplot(df1, aes(x=variables,'[[',1, y=variables,'[[',2)) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave("??????.png", plot, width = 1600, height = 900, units = "px")
}
显然,这是行不通的,我一直在互联网上搜索解决方案,但我对 R 的了解还不足以弄清楚如何让它工作。任何帮助将不胜感激!
这可能有用
https://r4ds.had.co.nz/iteration.html#the-map-functions https://aosmith.rbind.io/2018/08/20/automating-exploratory-plots/
variables <-
structure(list(
x = c("Gender", "Gender", "Type", "Emotion"),
y = c("Attr_scores", "Fear_scores", "Appr_scores", "Avoid_scores")
),
class = "data.frame",
row.names = c(NA,-4L))
variables
#> x y
#> 1 Gender Attr_scores
#> 2 Gender Fear_scores
#> 3 Type Appr_scores
#> 4 Emotion Avoid_scores
library(tidyverse)
# GROUP
map2(
.x = variables$x,
.y = variables$y,
.f = ~ group_by(df,!!sym(.x)) %>%
summarise(
N = n(),
Mean = mean(!!sym(.y), na.rm = TRUE),
Sd = sd(!!sym(.y), na.rm = TRUE)
)) %>%
set_names(nm = str_c(variables$x, variables$y, sep = "#"))
#> $`Gender#Attr_scores`
#> # A tibble: 2 x 4
#> Gender N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 female 9 24 2.74
#> 2 male 9 15 2.74
#>
#> $`Gender#Fear_scores`
#> # A tibble: 2 x 4
#> Gender N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 female 9 3.67 1.80
#> 2 male 9 3.67 1.80
#>
#> $`Type#Appr_scores`
#> # A tibble: 3 x 4
#> Type N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 animal 6 28 0.894
#> 2 cartoon 6 25 0.894
#> 3 human 6 22 0.894
#>
#> $`Emotion#Avoid_scores`
#> # A tibble: 3 x 4
#> Emotion N Mean Sd
#> <chr> <int> <dbl> <dbl>
#> 1 happy 6 28.5 5.61
#> 2 neutral 6 30.5 5.61
#> 3 sad 6 29.5 5.61
# ANOVA
map2(
.x = variables$x,
.y = variables$y,
.f = ~ aov(as.formula(str_c(.y, .x, sep = "~")), data = df)
) %>%
set_names(nm = str_c(variables$x, variables$y, sep = "#"))
#> $`Gender#Attr_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Gender Residuals
#> Sum of Squares 364.5 120.0
#> Deg. of Freedom 1 16
#>
#> Residual standard error: 2.738613
#> Estimated effects may be unbalanced
#>
#> $`Gender#Fear_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Gender Residuals
#> Sum of Squares 0 52
#> Deg. of Freedom 1 16
#>
#> Residual standard error: 1.802776
#> Estimated effects may be unbalanced
#>
#> $`Type#Appr_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Type Residuals
#> Sum of Squares 108 12
#> Deg. of Freedom 2 15
#>
#> Residual standard error: 0.8944272
#> Estimated effects may be unbalanced
#>
#> $`Emotion#Avoid_scores`
#> Call:
#> aov(formula = as.formula(str_c(.y, .x, sep = "~")), data = df)
#>
#> Terms:
#> Emotion Residuals
#> Sum of Squares 12.0 472.5
#> Deg. of Freedom 2 15
#>
#> Residual standard error: 5.612486
#> Estimated effects may be unbalanced
#PLOT
f <- function(x, y) {
gender_attr_plot <- ggplot(df, aes(x = .data[[x]], y = .data[[y]])) +
geom_jitter(position = position_jitter(0.2)) +
stat_summary(
fun.data = mean_sdl,
fun.args = list(mult = 1),
geom = "pointrange",
color = "red"
)
}
all_plots <- map2(.x = variables$x, .y = variables$y, .f = f)
plotnames <- str_c(variables$x, "#", variables$y, ".png")
walk2(
.x = plotnames,
.y = all_plots,
.f = ~ ggsave(
filename = .x,
plot = .y,
width = 1600,
height = 900,
units = "px"
)
)
由 reprex package (v2.0.1)
于 2021-10-25 创建数据
Picture <- c(1:18)
Emotion <- rep(c('happy', 'sad', 'neutral'), times = 6)
Gender <- rep(c('male', 'female'), each = 9)
Type <-
rep(c('human', 'cartoon', 'animal', 'human', 'cartoon', 'animal'),
each = 3)
Trial <- rep(c('first'), times = 18)
Attr_scores <- c(11:28)
Fear_scores <- rep(c(3, 6, 2), times = 6)
Appr_scores <- rep(c(21:29), times = 2)
Avoid_scores <- c(21:38)
df <-
data.frame(
Picture,
Emotion,
Gender,
Type,
Trial,
Attr_scores,
Fear_scores,
Appr_scores,
Avoid_scores
)
以下是您的任务的可能解决方案:
我稍微修改了您的代码并创建了一个函数 my_function
,使用此函数您可以获得一对数据集的所需输出。结果是列表中的return!
library(dplyr)
library(ggplot2)
my_function <- function(df, x, y) {
# Summary
a <- group_by(df, {{x}}) %>%
summarise(
N = n(),
Mean = mean({{y}}, na.rm = TRUE),
Sd = sd({{y}}, na.rm = TRUE)
)
# ANOVA
res.aov <- aov({{y}} ~ {{x}}, data = df)
b <- summary(res.aov)
# Plot
c <- ggplot(df1, aes(x={{x}}, y={{y}})) +
geom_jitter(position=position_jitter(0.2))+
stat_summary(fun.data=mean_sdl, fun.args = list(mult = 1),
geom="pointrange", color="red")
ggsave(paste0(deparse(substitute(x)), "_",
deparse(substitute(y)), ".png"), width = 1600, height = 900, units = "px")
output<-list(a,b,c)
return(output)
}
# cases 1 - 4
my_function(df1, Gender, Attr_scores)
my_function(df1, Gender, Avoid_scores)
my_function(df1, Emotion, Attr_scores)
my_function(df1, Emotion, Fear_scores)