如何从 R 中的 for 循环中的结果创建多个数据帧?

How do I create multiple dataframes from a result in a for loop in R?

我有 11 个数据框,其中包含切萨皮克海草调查的各种观察结果。每个数据框都包含以下变量(包括示例值)。有 11 个数据框,因为每个数据框代表来自单个 SAMPYR 的观察结果。所以:

    > head(density.2007)
       PLOT SIZE DENSITY SEEDYR SAMPYR AGE SHOOTS
    1  HI 2  1.0      50   2006   2007   1    6.0
    2  HI 5  0.5     100   2006   2007   1   11.6
    3  HI 7  0.5      50   2006   2007   1    6.0
    4  HI 9  0.5     100   2006   2007   1    9.6
    5 HI 10  1.0     100   2006   2007   1   30.0
    6 HI 23  1.0      50   2006   2007   1   40.4
                                               
 > head(density.2008)
   PLOT SIZE DENSITY SEEDYR SAMPYR AGE SHOOTS NOTES id
29 HI 1  1.0     100   2007   2008   1   39.6       29
30 HI 2  1.0      50   2006   2008   2   54.8       30
31 HI 3  0.5     100   2007   2008   1   11.2       31
32 HI 4  1.0     100   2007   2008   1    8.8       32
33 HI 5  0.5     100   2006   2008   2   24.0       33
34 HI 7  0.5      50   2006   2008   2    0.0       34

我想编写一个 for 循环,从 PLOT 列中获取唯一字符的数量,并计算每个字符的频率(这样我就可以进行过滤,只列出出现多次的字符)。

我目前拥有的是:

density.names <- c("density.2007",
                   "density.2008",
                   "density.2009",
                   "density.2010",
                   "density.2011",
                   "density.2012",
                   "density.2013",
                   "density.2014",
                   "density.2015",
                   "density.2016",
                   "density.2017"
                   )

for(i in 1:length(density.names)) {
  get(density.names[i]) %>%
    count(PLOT) %>%
    print()
}  

这段代码输出

+     print()
      PLOT n
1     HI 1 1
2    HI 10 1
3   HI 100 1
4   HI 103 1
5   HI 104 1
6    HI 11 1
7    HI 13 1
8    HI 14 1
9    HI 15 1
10   HI 17 1
11   HI 18 1
12    HI 2 1
13   HI 20 1
14   HI 21 1
15   HI 23 1
16   HI 25 1
17   HI 27 1
18   HI 29 1
19    HI 3 1
20   HI 31 1
21   HI 32 1
22   HI 36 1
23   HI 37 1
24   HI 38 1
25   HI 39 1
26    HI 4 1
27   HI 40 1

但是我不能再做任何事情了。有没有办法过滤行,以便只显示 n=2 的行?或者从 for 循环中打印 11 个数据帧,以便我可以进一步操作它们,但至少我会在全局环境中拥有它们的副本?

谢谢!如果有帮助,我可以提供更多详细信息。

不要循环执行!!它是完全不同的。我会一步一步地告诉你。 我的第一步是准备一个函数来生成与您类似的数据。

library(tidyverse)

dens = function(year, n) tibble(
  PLOT = paste("HI", sample(1:(n/7), n, replace = T)),
  SIZE = runif(n, 0.1, 3), 
  DENSITY = sample(seq(50,200, by=50), n, replace = T),
  SEEDYR = year-1,
  SAMPYR = year,
  AGE = sample(1:5, n, replace = T),
  SHOOTS = runif(n, 0.1, 3)
)

让我们看看它是如何工作的并生成一些示例数据帧

set.seed(123)
density.2007 = dens(2007, 120)
density.2008 = dens(2008, 88)
density.2009 = dens(2009, 135)
density.2010 = dens(2010, 156)

density.2007 数据框看起来像这样

# A tibble: 120 x 7
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>
 1 HI 15 1.67      200   2006   2007     4  1.80 
 2 HI 14 0.270     150   2006   2007     2  2.44 
 3 HI 3  0.856      50   2006   2007     3  0.686
 4 HI 10 1.25      200   2006   2007     5  1.43 
 5 HI 11 0.673      50   2006   2007     5  1.40 
 6 HI 5  2.51      150   2006   2007     3  2.23 
 7 HI 14 0.543     150   2006   2007     2  2.17 
 8 HI 5  2.43      200   2006   2007     5  2.51 
 9 HI 9  1.69      100   2006   2007     4  2.67 
10 HI 3  2.02       50   2006   2007     2  2.86 
# ... with 110 more rows

现在需要将它们合为一帧

df = density.2007 %>% 
  bind_rows(density.2008) %>% 
  bind_rows(density.2009) %>% 
  bind_rows(density.2010) 

输出

# A tibble: 499 x 7
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>
 1 HI 15 1.67      200   2006   2007     4  1.80 
 2 HI 14 0.270     150   2006   2007     2  2.44 
 3 HI 3  0.856      50   2006   2007     3  0.686
 4 HI 10 1.25      200   2006   2007     5  1.43 
 5 HI 11 0.673      50   2006   2007     5  1.40 
 6 HI 5  2.51      150   2006   2007     3  2.23 
 7 HI 14 0.543     150   2006   2007     2  2.17 
 8 HI 5  2.43      200   2006   2007     5  2.51 
 9 HI 9  1.69      100   2006   2007     4  2.67 
10 HI 3  2.02       50   2006   2007     2  2.86 
# ... with 489 more rows

下一步,统计PLOT变量的每个值出现了多少次

PLOT.count = df %>% 
  group_by(PLOT) %>% 
  summarise(PLOT.n = n()) %>% 
  arrange(PLOT.n)

输出

# A tibble: 22 x 2
   PLOT  PLOT.n
   <chr>  <int>
 1 HI 20      3
 2 HI 22      5
 3 HI 21      7
 4 HI 18     12
 5 HI 2      19
 6 HI 1      20
 7 HI 15     20
 8 HI 17     21
 9 HI 6      22
10 HI 11     23
# ... with 12 more rows

在倒数第二步,让我们将这些计数器附加到原始数据框

df = df %>% left_join(PLOT.count, by="PLOT")

输出

# A tibble: 499 x 8
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS PLOT.n
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>  <int>
 1 HI 15 1.67      200   2006   2007     4  1.80      20
 2 HI 14 0.270     150   2006   2007     2  2.44      32
 3 HI 3  0.856      50   2006   2007     3  0.686     27
 4 HI 10 1.25      200   2006   2007     5  1.43      25
 5 HI 11 0.673      50   2006   2007     5  1.40      23
 6 HI 5  2.51      150   2006   2007     3  2.23      38
 7 HI 14 0.543     150   2006   2007     2  2.17      32
 8 HI 5  2.43      200   2006   2007     5  2.51      38
 9 HI 9  1.69      100   2006   2007     4  2.67      26
10 HI 3  2.02       50   2006   2007     2  2.86      27
# ... with 489 more rows

现在随意过滤

df %>% filter(PLOT.n > 30)

输出

# A tibble: 139 x 8
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS PLOT.n
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>  <int>
 1 HI 14 0.270     150   2006   2007     2  2.44      32
 2 HI 5  2.51      150   2006   2007     3  2.23      38
 3 HI 14 0.543     150   2006   2007     2  2.17      32
 4 HI 5  2.43      200   2006   2007     5  2.51      38
 5 HI 8  0.598      50   2006   2007     1  1.70      34
 6 HI 7  1.94       50   2006   2007     4  1.61      35
 7 HI 14 2.91       50   2006   2007     4  0.215     32
 8 HI 7  0.846     150   2006   2007     4  0.506     35
 9 HI 7  2.38      150   2006   2007     3  1.34      35
10 HI 7  2.62      100   2006   2007     3  0.167     35
# ... with 129 more rows

或者这样

df %>% filter(PLOT.n == min(PLOT.n))
df %>% filter(PLOT.n == median(PLOT.n))
df %>% filter(PLOT.n == max(PLOT.n))

输出

# A tibble: 3 x 8
  PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS PLOT.n
  <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>  <int>
1 HI 20 0.392     200   2009   2010     1  0.512      3
2 HI 20 0.859     150   2009   2010     5  2.62       3
3 HI 20 0.882     200   2009   2010     5  1.06       3
> df %>% filter(PLOT.n == median(PLOT.n))
# A tibble: 26 x 8
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS PLOT.n
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>  <int>
 1 HI 9  1.69      100   2006   2007     4  2.67      26
 2 HI 9  2.20       50   2006   2007     4  1.49      26
 3 HI 9  0.587     200   2006   2007     3  1.13      26
 4 HI 9  1.27       50   2006   2007     1  2.55      26
 5 HI 9  1.56      150   2006   2007     3  2.01      26
 6 HI 9  0.198     100   2006   2007     3  2.08      26
 7 HI 9  2.72      150   2007   2008     3  0.421     26
 8 HI 9  0.251     200   2007   2008     2  0.328     26
 9 HI 9  1.83       50   2007   2008     1  0.192     26
10 HI 9  1.97      100   2007   2008     1  0.900     26
# ... with 16 more rows
> df %>% filter(PLOT.n == max(PLOT.n))
# A tibble: 38 x 8
   PLOT   SIZE DENSITY SEEDYR SAMPYR   AGE SHOOTS PLOT.n
   <chr> <dbl>   <dbl>  <dbl>  <dbl> <int>  <dbl>  <int>
 1 HI 5  2.51      150   2006   2007     3   2.23     38
 2 HI 5  2.43      200   2006   2007     5   2.51     38
 3 HI 5  2.06      100   2006   2007     5   1.93     38
 4 HI 5  1.25      150   2006   2007     4   2.29     38
 5 HI 5  2.29      200   2006   2007     1   2.97     38
 6 HI 5  0.789     150   2006   2007     2   1.59     38
 7 HI 5  1.11      100   2007   2008     4   2.61     38
 8 HI 5  2.38      150   2007   2008     4   2.95     38
 9 HI 5  2.67      200   2007   2008     3   1.77     38
10 HI 5  2.63      100   2007   2008     1   1.90     38
# ... with 28 more rows