在 purrr 的 map() 函数中使用 levene_test?
use levene_test in map() function from purrr?
有没有办法通过 purrr
包中的 map()
函数进行 Levene 测试?或者是否有另一种简单的方法来计算各种变量的 Levene 检验?
我的数据框包含各种因子和数字列,所以我尝试使用 map_if()
,效果很好,例如,对于 Shapiro 测试。但是,我不知道如何指定公式。我想根据“治疗”因素测试我所有的数字变量。
library("tidyverse")
library("rstatix")
data <- data.frame(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
.Label = c("S1 ", "S2 ", "S3 "), class = "factor"),
plot = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L),
.Label = c(" Tree 1 ", " Tree 2 ", " Tree 3 "), class = "factor"),
Treatment = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L), .Label = c("T1", "T2"), class = "factor"),
flux1 = c(11.52188065, 8.43156699, 4.495312274, -1.866676811, 3.861102035, -0.814742373, 6.51039536, 4.767950345, 10.36544542, 1.065963875),
flux2 = c(0.142259208, 0.04060245, 0.807631744, 0.060127596, -0.157762562, 0.062464942, 0.043147603, 0.495001652, 0.34363348, 0.134183704),
flux3 = c(0.147506197, 1.131009714, 0.038860728, 0.0176834, 0.053191593, 0.047591306, 0.00573377, -0.034926075, 0.123379247, 0.018882469))
map_if(data, is.numeric, levene_test(. ~ Treatment))
有什么建议吗?感谢您的帮助!
现在还有一个可重现的例子 ;)
问题是 map
遍历列,它不再是 data.frame,而 levene_test
需要 data.frame/tibble
。根据?levene_test
data - a data frame for evaluating the formula or a model
因此,不是直接使用 map_if
,而是 select
数字列 (select(where(is.numeric))
),获取列名 (names
),循环那些在 map
、select
中,只有 'Treatment' 和数据中的循环列,使用 reformulate
创建公式并应用 levene_test
library(rstatix)
library(dplyr)
library(purrr)
data %>%
select(where(is.numeric)) %>%
names %>%
map_dfr(~ data %>%
select(Treatment, all_of(.x)) %>%
{levene_test(reformulate("Treatment", response = names(.)[2]), data = .)
})
-输出
# A tibble: 3 × 4
df1 df2 statistic p
<int> <int> <dbl> <dbl>
1 1 8 0.410 0.540
2 1 8 2.85 0.130
3 1 8 1.11 0.323
它也可以使用 across
完成 - 即循环 across
summarise
中 numeric
的列,使用 data
作为 cur_data()
,使用 reformulate
创建公式,应用 levene_test
,return 输出 list
,unclass
并使用 bind_rows
(因为 unclass
将从 list
)
中删除 data.frame 属性
data %>%
summarise(across(where(is.numeric),
~ list(cur_data() %>%
levene_test(reformulate("Treatment", response = cur_column()))))) %>%
unclass %>%
unname %>%
bind_rows
# A tibble: 3 × 4
df1 df2 statistic p
<int> <int> <dbl> <dbl>
1 1 8 0.410 0.540
2 1 8 2.85 0.130
3 1 8 1.11 0.323
如果我们需要 'flux' 列标识符,请使用 summarise
步骤而不将输出包装在 list
中,然后使用 bind_rows
和 .id
data %>%
summarise(across(where(is.numeric),
~ cur_data() %>%
levene_test(reformulate("Treatment", response = cur_column())))) %>%
unclass %>%
bind_rows(.id = 'flux')
# A tibble: 3 × 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
或者另一种选择是使用 OP map_if
本身
map_if(data, is.numeric,
~ levene_test(. ~ Treatment,
data = tibble(.x, Treatment = data$Treatment) ), .else = ~ NULL) %>%
bind_rows(.id = 'flux')
# A tibble: 3 × 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
这是一个替代方案:
首先转向长数据,
然后group_by
并应用公式(这里的通量应该是因数!)
library(tidyr)
library(dplyr)
data %>%
pivot_longer(
cols = starts_with("flux"),
names_to = "flux",
values_to = "value"
) %>%
mutate(flux = as.factor(flux)) %>%
group_by(flux) %>%
levene_test(value ~ Treatment)
flux df1 df2 statistic p
<fct> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
你也可以直接用summarize一点。然后旋转并取消嵌套结果。
library(dplyr)
library(tidyr)
data %>%
summarize(across(where(is.numeric),
~ list(levene_test(cur_data(), . ~ Treatment)))) %>%
pivot_longer(everything(), names_to = "flux", values_to = "levene_test") %>%
unnest(levene_test)
另一种选择是将变量名称输入地图并创建公式。
library(purrr)
names(data)[map_lgl(data, is.numeric)] %>%
set_names() %>%
map_dfr(~ levene_test(data, as.formula(paste(.x, "~ Treatment"))), .id = "flux")
结果(两者):
# A tibble: 3 x 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
有没有办法通过 purrr
包中的 map()
函数进行 Levene 测试?或者是否有另一种简单的方法来计算各种变量的 Levene 检验?
我的数据框包含各种因子和数字列,所以我尝试使用 map_if()
,效果很好,例如,对于 Shapiro 测试。但是,我不知道如何指定公式。我想根据“治疗”因素测试我所有的数字变量。
library("tidyverse")
library("rstatix")
data <- data.frame(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
.Label = c("S1 ", "S2 ", "S3 "), class = "factor"),
plot = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L),
.Label = c(" Tree 1 ", " Tree 2 ", " Tree 3 "), class = "factor"),
Treatment = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L), .Label = c("T1", "T2"), class = "factor"),
flux1 = c(11.52188065, 8.43156699, 4.495312274, -1.866676811, 3.861102035, -0.814742373, 6.51039536, 4.767950345, 10.36544542, 1.065963875),
flux2 = c(0.142259208, 0.04060245, 0.807631744, 0.060127596, -0.157762562, 0.062464942, 0.043147603, 0.495001652, 0.34363348, 0.134183704),
flux3 = c(0.147506197, 1.131009714, 0.038860728, 0.0176834, 0.053191593, 0.047591306, 0.00573377, -0.034926075, 0.123379247, 0.018882469))
map_if(data, is.numeric, levene_test(. ~ Treatment))
有什么建议吗?感谢您的帮助!
现在还有一个可重现的例子 ;)
问题是 map
遍历列,它不再是 data.frame,而 levene_test
需要 data.frame/tibble
。根据?levene_test
data - a data frame for evaluating the formula or a model
因此,不是直接使用 map_if
,而是 select
数字列 (select(where(is.numeric))
),获取列名 (names
),循环那些在 map
、select
中,只有 'Treatment' 和数据中的循环列,使用 reformulate
创建公式并应用 levene_test
library(rstatix)
library(dplyr)
library(purrr)
data %>%
select(where(is.numeric)) %>%
names %>%
map_dfr(~ data %>%
select(Treatment, all_of(.x)) %>%
{levene_test(reformulate("Treatment", response = names(.)[2]), data = .)
})
-输出
# A tibble: 3 × 4
df1 df2 statistic p
<int> <int> <dbl> <dbl>
1 1 8 0.410 0.540
2 1 8 2.85 0.130
3 1 8 1.11 0.323
它也可以使用 across
完成 - 即循环 across
summarise
中 numeric
的列,使用 data
作为 cur_data()
,使用 reformulate
创建公式,应用 levene_test
,return 输出 list
,unclass
并使用 bind_rows
(因为 unclass
将从 list
)
data %>%
summarise(across(where(is.numeric),
~ list(cur_data() %>%
levene_test(reformulate("Treatment", response = cur_column()))))) %>%
unclass %>%
unname %>%
bind_rows
# A tibble: 3 × 4
df1 df2 statistic p
<int> <int> <dbl> <dbl>
1 1 8 0.410 0.540
2 1 8 2.85 0.130
3 1 8 1.11 0.323
如果我们需要 'flux' 列标识符,请使用 summarise
步骤而不将输出包装在 list
中,然后使用 bind_rows
和 .id
data %>%
summarise(across(where(is.numeric),
~ cur_data() %>%
levene_test(reformulate("Treatment", response = cur_column())))) %>%
unclass %>%
bind_rows(.id = 'flux')
# A tibble: 3 × 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
或者另一种选择是使用 OP map_if
本身
map_if(data, is.numeric,
~ levene_test(. ~ Treatment,
data = tibble(.x, Treatment = data$Treatment) ), .else = ~ NULL) %>%
bind_rows(.id = 'flux')
# A tibble: 3 × 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
这是一个替代方案: 首先转向长数据,
然后group_by
并应用公式(这里的通量应该是因数!)
library(tidyr)
library(dplyr)
data %>%
pivot_longer(
cols = starts_with("flux"),
names_to = "flux",
values_to = "value"
) %>%
mutate(flux = as.factor(flux)) %>%
group_by(flux) %>%
levene_test(value ~ Treatment)
flux df1 df2 statistic p
<fct> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323
你也可以直接用summarize一点。然后旋转并取消嵌套结果。
library(dplyr)
library(tidyr)
data %>%
summarize(across(where(is.numeric),
~ list(levene_test(cur_data(), . ~ Treatment)))) %>%
pivot_longer(everything(), names_to = "flux", values_to = "levene_test") %>%
unnest(levene_test)
另一种选择是将变量名称输入地图并创建公式。
library(purrr)
names(data)[map_lgl(data, is.numeric)] %>%
set_names() %>%
map_dfr(~ levene_test(data, as.formula(paste(.x, "~ Treatment"))), .id = "flux")
结果(两者):
# A tibble: 3 x 5
flux df1 df2 statistic p
<chr> <int> <int> <dbl> <dbl>
1 flux1 1 8 0.410 0.540
2 flux2 1 8 2.85 0.130
3 flux3 1 8 1.11 0.323