在 for 循环中使用 split()
Using split() in a for loop
我想创建一个循环,以获取每个变量的因子级别的摘要拆分。例如,如果我想在“分组”变量中按因子水平拆分摘要,我会使用:
df %>%
select(grouping, length, weight) %>%
split(.$grouping) %>%
map(summary)
但是,我不确定如何将其放入循环中,以便根据数据框中每个感兴趣变量的因子水平获得摘要。
例如,我可以使用以下方法获取数据框第 3 列和第 4 列变量的 summary()
:
# Dummy data
length = sample(30:60, 10, replace = FALSE)
weight = sample(50:70, 10, replace = FALSE)
grouping = c("A", "A", "B", "A", "B", "A", "B", "B", "B", "A")
colour = c("Blue", "Green", "Green", "Green", "Blue", "Blue", "Blue", "Green", "Blue", "Green")
type = c("Case", "Control", "Case", "Case", "Case", "Control", "Control", "Case", "Control", "Case")
df = data.frame(length, weight, grouping, colour, type)
# Variables to loop
colNames <- names(df)[c(3:4)]
# Summary
for(i in colNames){
# Summary
summary <- df %>%
select(length, weight, .$colNames[i]) %>%
summary()
print(summary)
}
但是当按每个变量的因子水平拆分时我做不到:
# Variables to loop
colNames = names(df)[c(3,4)]
# Summary
for(i in colNames){
df %>%
select(length, weight, .$colNames[i]) %>%
split(.$colNames[i]) %>%
summary()
}
我认为 split(.colNames)
是问题所在,但我不确定如何解决。感谢您的帮助!
您的代码有两个问题:
i
已经是您的专栏名称。因此 .$colNames[i]
是 NULL
。此问题已在 select
. 中出现
- 如果您想使用包含列名的变量访问数据框中的列,则
$
将不起作用。而是使用 [[
.
# Dummy data
set.seed(42)
length <- sample(30:60, 10, replace = FALSE)
weight <- sample(50:70, 10, replace = FALSE)
grouping <- c("A", "A", "B", "A", "B", "A", "B", "B", "B", "A")
colour <- c("Blue", "Green", "Green", "Green", "Blue", "Blue", "Blue", "Green", "Blue", "Green")
type <- c("Case", "Control", "Case", "Case", "Case", "Control", "Control", "Case", "Control", "Case")
df <- data.frame(length, weight, grouping, colour, type)
# Variables to loop
colNames <- names(df)[c(3:4)]
library(dplyr)
library(purrr)
# Summary
for (i in colNames) {
df %>%
select(length, weight, all_of(i)) %>%
split(.[[i]]) %>%
map(summary) %>%
map(print)
}
#> length weight grouping
#> Min. :33.0 Min. :52.0 Length:5
#> 1st Qu.:34.0 1st Qu.:53.0 Class :character
#> Median :36.0 Median :54.0 Mode :character
#> Mean :40.6 Mean :59.2
#> 3rd Qu.:46.0 3rd Qu.:67.0
#> Max. :54.0 Max. :70.0
#> length weight grouping
#> Min. :30 Min. :58.0 Length:5
#> 1st Qu.:39 1st Qu.:60.0 Class :character
#> Median :44 Median :63.0 Mode :character
#> Mean :44 Mean :62.2
#> 3rd Qu.:47 3rd Qu.:64.0
#> Max. :60 Max. :66.0
#> NULL
#> length weight colour
#> Min. :33.0 Min. :52.0 Length:5
#> 1st Qu.:39.0 1st Qu.:53.0 Class :character
#> Median :44.0 Median :58.0 Mode :character
#> Mean :41.8 Mean :57.4
#> 3rd Qu.:46.0 3rd Qu.:60.0
#> Max. :47.0 Max. :64.0
#> length weight colour
#> Min. :30.0 Min. :54 Length:5
#> 1st Qu.:34.0 1st Qu.:63 Class :character
#> Median :36.0 Median :66 Mode :character
#> Mean :42.8 Mean :64
#> 3rd Qu.:54.0 3rd Qu.:67
#> Max. :60.0 Max. :70
我想创建一个循环,以获取每个变量的因子级别的摘要拆分。例如,如果我想在“分组”变量中按因子水平拆分摘要,我会使用:
df %>%
select(grouping, length, weight) %>%
split(.$grouping) %>%
map(summary)
但是,我不确定如何将其放入循环中,以便根据数据框中每个感兴趣变量的因子水平获得摘要。
例如,我可以使用以下方法获取数据框第 3 列和第 4 列变量的 summary()
:
# Dummy data
length = sample(30:60, 10, replace = FALSE)
weight = sample(50:70, 10, replace = FALSE)
grouping = c("A", "A", "B", "A", "B", "A", "B", "B", "B", "A")
colour = c("Blue", "Green", "Green", "Green", "Blue", "Blue", "Blue", "Green", "Blue", "Green")
type = c("Case", "Control", "Case", "Case", "Case", "Control", "Control", "Case", "Control", "Case")
df = data.frame(length, weight, grouping, colour, type)
# Variables to loop
colNames <- names(df)[c(3:4)]
# Summary
for(i in colNames){
# Summary
summary <- df %>%
select(length, weight, .$colNames[i]) %>%
summary()
print(summary)
}
但是当按每个变量的因子水平拆分时我做不到:
# Variables to loop
colNames = names(df)[c(3,4)]
# Summary
for(i in colNames){
df %>%
select(length, weight, .$colNames[i]) %>%
split(.$colNames[i]) %>%
summary()
}
我认为 split(.colNames)
是问题所在,但我不确定如何解决。感谢您的帮助!
您的代码有两个问题:
i
已经是您的专栏名称。因此.$colNames[i]
是NULL
。此问题已在select
. 中出现
- 如果您想使用包含列名的变量访问数据框中的列,则
$
将不起作用。而是使用[[
.
# Dummy data
set.seed(42)
length <- sample(30:60, 10, replace = FALSE)
weight <- sample(50:70, 10, replace = FALSE)
grouping <- c("A", "A", "B", "A", "B", "A", "B", "B", "B", "A")
colour <- c("Blue", "Green", "Green", "Green", "Blue", "Blue", "Blue", "Green", "Blue", "Green")
type <- c("Case", "Control", "Case", "Case", "Case", "Control", "Control", "Case", "Control", "Case")
df <- data.frame(length, weight, grouping, colour, type)
# Variables to loop
colNames <- names(df)[c(3:4)]
library(dplyr)
library(purrr)
# Summary
for (i in colNames) {
df %>%
select(length, weight, all_of(i)) %>%
split(.[[i]]) %>%
map(summary) %>%
map(print)
}
#> length weight grouping
#> Min. :33.0 Min. :52.0 Length:5
#> 1st Qu.:34.0 1st Qu.:53.0 Class :character
#> Median :36.0 Median :54.0 Mode :character
#> Mean :40.6 Mean :59.2
#> 3rd Qu.:46.0 3rd Qu.:67.0
#> Max. :54.0 Max. :70.0
#> length weight grouping
#> Min. :30 Min. :58.0 Length:5
#> 1st Qu.:39 1st Qu.:60.0 Class :character
#> Median :44 Median :63.0 Mode :character
#> Mean :44 Mean :62.2
#> 3rd Qu.:47 3rd Qu.:64.0
#> Max. :60 Max. :66.0
#> NULL
#> length weight colour
#> Min. :33.0 Min. :52.0 Length:5
#> 1st Qu.:39.0 1st Qu.:53.0 Class :character
#> Median :44.0 Median :58.0 Mode :character
#> Mean :41.8 Mean :57.4
#> 3rd Qu.:46.0 3rd Qu.:60.0
#> Max. :47.0 Max. :64.0
#> length weight colour
#> Min. :30.0 Min. :54 Length:5
#> 1st Qu.:34.0 1st Qu.:63 Class :character
#> Median :36.0 Median :66 Mode :character
#> Mean :42.8 Mean :64
#> 3rd Qu.:54.0 3rd Qu.:67
#> Max. :60.0 Max. :70