如何 select for 循环中的一列
How to select a column in a for loop
我有一个包含 310 个不同列的大数据框,在这里我创建了一个类似的小示例。
我会 select 使用 for 循环 "sp..." 列。
`ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
COD=c(1,8,4,5,6,8,7,2,8, 10),
SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300),
hd_bio_4=c(208.92,248.10,151.95, 130.01,90.01,
140.01,150.09,400.25, 300.00,100.50),
hd_bio_6=c(207.92,208.10,161.95, 170.01,190.01,
120.01,155.09,200.25, 100.00,100.50),
hd_bio_7=c(227.92,218.10,161.95, 170.01,150.01,
150.71,160.09,220.25, 120.00,108.50),
he_bio_4=c(208.92,248.10,151.95, 140.01,60.01,
160.01,157.09,420.25, 300.00,100.50),
he_bio_6=c(257.92,238.10,131.95, 130.01,160.01,
125.01,155.09,220.25, 100.00,100.50),
he_bio_7=c(227.92,218.10,161.95, 130.01,150.01,
100.71,165.09,220.25, 120.00,108.50)))`
`
我想为每一列应用 lm() 函数。我尝试了一些尝试但没有成功。
谁能帮我?
谢谢
您可以使用 purrr::map
:
library(tidyverse)
ivs <- colnames(ex %>% select(-ID, -COD, -SP))
names(ivs) <- ivs
ivs %>% purrr::map_dfr(~lm(ex$SP ~ ex[,.x]) %>% broom::tidy(), .id = "iv")
注意:如果我们将 term
列设置为实际报告使用的变量,则输出更具可读性。为此,请在对 lm()
的调用中使用 data
参数:
ivs %>% map_dfr(~lm(SP ~ ., data = ex[,c(.x, "SP")]) %>% broom::tidy(), .id = "iv")
输出:
# A tibble: 20 x 6
iv term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 sp010_hd (Intercept) 22.0 15.5 1.42 0.192
2 sp010_hd sp010_hd 0.0758 0.0362 2.10 0.0694
3 sp300_he (Intercept) 42.6 22.1 1.93 0.0896
4 sp300_he sp300_he 0.0199 0.0479 0.415 0.689
5 sp330_hg (Intercept) 42.6 22.1 1.93 0.0896
6 sp330_hg sp330_hg 0.0199 0.0479 0.415 0.689
7 sp330_cc (Intercept) 42.6 22.1 1.93 0.0896
8 sp330_cc sp330_cc 0.0199 0.0479 0.415 0.689
9 hd_bio_4 (Intercept) 36.7 19.1 1.92 0.0913
10 hd_bio_4 hd_bio_4 0.0742 0.0897 0.828 0.432
11 hd_bio_6 (Intercept) 65.0 35.9 1.81 0.108
12 hd_bio_6 hd_bio_6 -0.0866 0.216 -0.401 0.699
13 hd_bio_7 (Intercept) 65.5 38.2 1.72 0.124
14 hd_bio_7 hd_bio_7 -0.0860 0.220 -0.390 0.707
15 he_bio_4 (Intercept) 36.0 18.2 1.98 0.0833
16 he_bio_4 he_bio_4 0.0772 0.0831 0.929 0.380
17 he_bio_6 (Intercept) 68.6 26.7 2.57 0.0332
18 he_bio_6 he_bio_6 -0.109 0.156 -0.695 0.507
19 he_bio_7 (Intercept) 67.8 31.6 2.15 0.0640
20 he_bio_7 he_bio_7 -0.105 0.190 -0.553 0.596
仅选择带有 "sp" 前缀的列可以使用 dplyr
包和 starts_with()
函数轻松完成:
ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
COD=c(1,8,4,5,6,8,7,2,8, 10),
SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300))
library(dplyr)
select(ex, starts_with("sp"))
如果你想扩展它以对每个人执行回归,你可以使用 lapply()
或 purrr::map()
像这样:
#Use gather to get all the prefixes in one column with values in another,
#then use map() to do a linear model on each group
ex %>%
gather(key = sp_number, value = whatever_units, starts_with("sp", ignore.case = FALSE)) %>%
group_by(sp_number) %>%
nest() %>%
mutate(lm_results = map(data, ~lm(whatever_units ~ SP, data = .)),
tidy_results = map(lm_results, broom::tidy)) %>%
unnest(tidy_results)
我有一个包含 310 个不同列的大数据框,在这里我创建了一个类似的小示例。 我会 select 使用 for 循环 "sp..." 列。
`ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
COD=c(1,8,4,5,6,8,7,2,8, 10),
SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300),
hd_bio_4=c(208.92,248.10,151.95, 130.01,90.01,
140.01,150.09,400.25, 300.00,100.50),
hd_bio_6=c(207.92,208.10,161.95, 170.01,190.01,
120.01,155.09,200.25, 100.00,100.50),
hd_bio_7=c(227.92,218.10,161.95, 170.01,150.01,
150.71,160.09,220.25, 120.00,108.50),
he_bio_4=c(208.92,248.10,151.95, 140.01,60.01,
160.01,157.09,420.25, 300.00,100.50),
he_bio_6=c(257.92,238.10,131.95, 130.01,160.01,
125.01,155.09,220.25, 100.00,100.50),
he_bio_7=c(227.92,218.10,161.95, 130.01,150.01,
100.71,165.09,220.25, 120.00,108.50)))`
`
我想为每一列应用 lm() 函数。我尝试了一些尝试但没有成功。 谁能帮我? 谢谢
您可以使用 purrr::map
:
library(tidyverse)
ivs <- colnames(ex %>% select(-ID, -COD, -SP))
names(ivs) <- ivs
ivs %>% purrr::map_dfr(~lm(ex$SP ~ ex[,.x]) %>% broom::tidy(), .id = "iv")
注意:如果我们将 term
列设置为实际报告使用的变量,则输出更具可读性。为此,请在对 lm()
的调用中使用 data
参数:
ivs %>% map_dfr(~lm(SP ~ ., data = ex[,c(.x, "SP")]) %>% broom::tidy(), .id = "iv")
输出:
# A tibble: 20 x 6
iv term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 sp010_hd (Intercept) 22.0 15.5 1.42 0.192
2 sp010_hd sp010_hd 0.0758 0.0362 2.10 0.0694
3 sp300_he (Intercept) 42.6 22.1 1.93 0.0896
4 sp300_he sp300_he 0.0199 0.0479 0.415 0.689
5 sp330_hg (Intercept) 42.6 22.1 1.93 0.0896
6 sp330_hg sp330_hg 0.0199 0.0479 0.415 0.689
7 sp330_cc (Intercept) 42.6 22.1 1.93 0.0896
8 sp330_cc sp330_cc 0.0199 0.0479 0.415 0.689
9 hd_bio_4 (Intercept) 36.7 19.1 1.92 0.0913
10 hd_bio_4 hd_bio_4 0.0742 0.0897 0.828 0.432
11 hd_bio_6 (Intercept) 65.0 35.9 1.81 0.108
12 hd_bio_6 hd_bio_6 -0.0866 0.216 -0.401 0.699
13 hd_bio_7 (Intercept) 65.5 38.2 1.72 0.124
14 hd_bio_7 hd_bio_7 -0.0860 0.220 -0.390 0.707
15 he_bio_4 (Intercept) 36.0 18.2 1.98 0.0833
16 he_bio_4 he_bio_4 0.0772 0.0831 0.929 0.380
17 he_bio_6 (Intercept) 68.6 26.7 2.57 0.0332
18 he_bio_6 he_bio_6 -0.109 0.156 -0.695 0.507
19 he_bio_7 (Intercept) 67.8 31.6 2.15 0.0640
20 he_bio_7 he_bio_7 -0.105 0.190 -0.553 0.596
仅选择带有 "sp" 前缀的列可以使用 dplyr
包和 starts_with()
函数轻松完成:
ex <-data.frame(ID=c(1,2,3,4,5,6,7,8,9,10),
COD=c(1,8,4,5,6,8,7,2,8, 10),
SP=c(10, 20, 40,50,60, 70,70,100,50, 40),
sp010_hd=c(100,200,350,500,500,250,240, 680, 700, 300),
sp300_he=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_hg=c(100,500,650,500,500,250,260, 480, 700, 300),
sp330_cc=c(100,500,650,500,500,250,260, 480, 700, 300))
library(dplyr)
select(ex, starts_with("sp"))
如果你想扩展它以对每个人执行回归,你可以使用 lapply()
或 purrr::map()
像这样:
#Use gather to get all the prefixes in one column with values in another,
#then use map() to do a linear model on each group
ex %>%
gather(key = sp_number, value = whatever_units, starts_with("sp", ignore.case = FALSE)) %>%
group_by(sp_number) %>%
nest() %>%
mutate(lm_results = map(data, ~lm(whatever_units ~ SP, data = .)),
tidy_results = map(lm_results, broom::tidy)) %>%
unnest(tidy_results)