使用离散数据分析生物体的生长模式

Question

我有特定植物物种 200 个个体大小的数据。但是尺寸是通过间接方式测量的，在总共 14 个月中每月计算叶子的数量（离散数据）。植物的发芽、生长和死亡非常不规则，有的植物寿命长，有的植物死得快，而且发芽时间也不规律：新植物在整个研究期间不断发芽并纳入研究.这是我的数据示例（单元格内的数字指的是叶子的数量）：

	Month.1	Month.2	Month.3	Month.4	Month.5	Month.6	Month.7
plant.1	3	21	15	-	-	-	-
plant.2	-	7	14	-	-	-	-
plant.3	-	8	12	10	-	-	-
plant.4	-	-	1	3	5	-	-
plant.5	-	3	6	18	13	4	-
.....	...	...	...	...	...	...	...

根据 Shibaprasadb 的建议，这是我的数据的一个子集，已经转换为长格式：

df <- dput(df)
structure(list(plant = c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 28L, 29L, 29L, 29L, 30L, 31L, 32L, 32L, 
32L, 32L, 32L, 62L, 62L, 63L, 64L, 65L, 65L, 65L, 65L, 66L, 67L, 
67L, 67L), month = c(4L, 5L, 6L, 7L, 8L, 4L, 7L, 8L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 7L, 6L, 7L, 8L, 6L, 7L, 4L, 5L, 6L, 7L, 8L, 
5L, 6L, 6L, 6L, 5L, 6L, 7L, 8L, 8L, 2L, 3L, 4L), time = c(0L, 
1L, 2L, 3L, 4L, 0L, 0L, 1L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 0L, 0L, 
1L, 2L, 0L, 0L, 0L, 1L, 2L, 3L, 4L, 0L, 1L, 0L, 0L, 0L, 1L, 2L, 
3L, 0L, 0L, 1L, 2L), leaves = c(6L, 18L, 9L, 24L, 6L, 12L, 6L, 
6L, 63L, 66L, 15L, 9L, 15L, 21L, 12L, 12L, 3L, 42L, 12L, 9L, 
3L, 15L, 21L, 18L, 27L, 15L, 21L, 36L, 24L, 21L, 3L, 12L, 18L, 
6L, 3L, 15L, 3L, 3L), tray = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), soil_nutrient = c(62.4, 
71, 13.6, 37.6, 13.8, 62.4, 37.6, 13.8, 47.6, 51.8, 62.4, 71, 
13.6, 37.6, 13.8, 25.4, 14.2, 25.4, 17, 14.2, 25.4, 51.6, 72.6, 
14.2, 25.4, 17, 148.2, 29.4, 29.4, 29.4, 148.2, 29.4, 103.6, 
122.4, 122.4, 116.2, 141, 117.6), watering = c(81.6, 89, 10.6, 
57.8, 1.2, 81.6, 57.8, 1.2, 46, 48.6, 81.6, 89, 10.6, 57.8, 1.2, 
57.8, 10.6, 57.8, 1.2, 10.6, 57.8, 81.6, 89, 10.6, 57.8, 1.2, 
89, 10.6, 10.6, 10.6, 89, 10.6, 57.8, 1.2, 1.2, 46, 48.6, 81.6
), UV_treatment = c("n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "y", "y", "y", "n", "y", "n", "n", "n", 
"n", "n", "n")), class = "data.frame", row.names = c(NA, -38L
))

df
   plant month time leaves tray soil_nutrient watering UV_treatment
1      1     4    0      6    1          62.4     81.6            n
2      1     5    1     18    1          71.0     89.0            n
3      1     6    2      9    1          13.6     10.6            n
4      1     7    3     24    1          37.6     57.8            n
5      1     8    4      6    1          13.8      1.2            n
6      2     4    0     12    1          62.4     81.6            n
7      3     7    0      6    1          37.6     57.8            n
8      3     8    1      6    1          13.8      1.2            n
9      4     2    0     63    1          47.6     46.0            n
10     4     3    1     66    1          51.8     48.6            n
11     4     4    2     15    1          62.4     81.6            n
12     4     5    3      9    1          71.0     89.0            n
13     4     6    4     15    1          13.6     10.6            n
14     4     7    5     21    1          37.6     57.8            n
15     4     8    6     12    1          13.8      1.2            n
16    28     7    0     12    3          25.4     57.8            n
17    29     6    0      3    3          14.2     10.6            n
18    29     7    1     42    3          25.4     57.8            n
19    29     8    2     12    3          17.0      1.2            n
20    30     6    0      9    3          14.2     10.6            n
21    31     7    0      3    3          25.4     57.8            n
22    32     4    0     15    3          51.6     81.6            n
23    32     5    1     21    3          72.6     89.0            n
24    32     6    2     18    3          14.2     10.6            n
25    32     7    3     27    3          25.4     57.8            n
26    32     8    4     15    3          17.0      1.2            n
27    62     5    0     21    7         148.2     89.0            n
28    62     6    1     36    7          29.4     10.6            y
29    63     6    0     24    7          29.4     10.6            y
30    64     6    0     21    7          29.4     10.6            y
31    65     5    0      3    7         148.2     89.0            n
32    65     6    1     12    7          29.4     10.6            y
33    65     7    2     18    7         103.6     57.8            n
34    65     8    3      6    7         122.4      1.2            n
35    66     8    0      3    7         122.4      1.2            n
36    67     2    0     15    7         116.2     46.0            n
37    67     3    1      3    7         141.0     48.6            n
38    67     4    2      3    7         117.6     81.6            n

植物 = 每株植物的识别编号。
月 = 植物发芽的月份。
时间 = 类似月，但每株从零开始。
叶子 = 每株植物的叶子数。
托盘 = 每株植物所在的托盘；总共有 10 个托盘，每个托盘包含不同类型的土壤。
soil_nutrient = 每个月每个托盘的营养总量。
浇水 = 每个月每个托盘的加水量。
UV_treatment = 我们有兴趣探索其对植物的影响的侵蚀性紫外线处理。

我有兴趣检查它们的生长是否有某种模式，我的意思是它们是否首先显示叶子数量增加，然后是否稳定，然后突然死亡或逐渐减少数量死前的叶子。

我一直在寻找 R 中的物候分析，但到目前为止我发现的主要与气候变化、全球参数等有关。另一方面，考虑时间序列的增长分析主要集中在特定变量的长数据序列，而不是几个个体。

Answer 1

library(tidyverse)
library(ggpubr)
library(lmerTest)
#> Loading required package: lme4
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#> 
#>     expand, pack, unpack
#> 
#> Attaching package: 'lmerTest'
#> The following object is masked from 'package:lme4':
#> 
#>     lmer
#> The following object is masked from 'package:stats':
#> 
#>     step

data <- tibble::tribble(
  ~Plant, ~Month.1, ~Moßnth.2, ~Month.3, ~Month.4, ~Month.5, ~Month.6, ~Month.7,
  "plant.1",      "3",     "21",      "15",      "-",      "-",      "-",      "-",
  "plant.2",      "-",      "7",      "14",      "-",      "-",      "-",      "-",
  "plant.3",      "-",      "8",      "12",     "10",      "-",      "-",      "-",
  "plant.4",      "-",      "-",       "1",      "3",      "5",      "-",      "-",
  "plant.5",      "-",      "3",       "6",     "18",     "13",      "4",      "-"
)

data <-
  data %>% 
  pivot_longer(-Plant, names_to = "Month", values_to = "n_leafs") %>%
  mutate(
    n_leafs = n_leafs %>% as.numeric(),
    Month = Month %>% str_extract("[0-9]+$") %>% as.numeric(),
    Plant = Plant %>% str_extract("[0-9]+$") %>% as.numeric()
  ) %>%
  # Normalization: Just count the number of months since seeding for each Plant
  group_by(Plant) %>%
  mutate(Month = Month - min(Month)) %>%
  ungroup()
#> Warning in n_leafs %>% as.numeric(): NAs introduced by coercion

#
# The overall trend is that the number of leaves are getting lesser over time.
# However, this is not significant.
#

data %>%
  ggplot(aes(Month, n_leafs, color = Plant)) +
  geom_point() +
  stat_smooth(method = "lm") +
  stat_cor()
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 19 rows containing non-finite values (stat_smooth).
#> Warning: Removed 19 rows containing non-finite values (stat_cor).
#> Warning: Removed 19 rows containing missing values (geom_point).

#
# The number of leaves are not significantly different between the months
#

data %>%
  ggplot(aes(factor(Month), n_leafs)) +
  geom_boxplot() +
  stat_compare_means()
#> Warning: Removed 19 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 19 rows containing non-finite values (stat_compare_means).


#
# There is still no significance after controlling for the different plants
#

lmer(n_leafs ~ Month + (1|Plant), data = data) %>%
  summary()
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: n_leafs ~ Month + (1 | Plant)
#>    Data: data
#> 
#> REML criterion at convergence: 96.8
#> 
#> Scaled residuals: 
#>     Min      1Q  Median      3Q     Max 
#> -1.1084 -0.7883 -0.2424  0.6603  1.8827 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev.
#>  Plant    (Intercept)  4.789   2.188   
#>  Residual             34.943   5.911   
#> Number of obs: 16, groups:  Plant, 5
#> 
#> Fixed effects:
#>             Estimate Std. Error      df t value Pr(>|t|)  
#> (Intercept)   8.2657     3.2163  9.1434   2.570   0.0298 *
#> Month         0.3186     1.2149 13.4746   0.262   0.7971  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Correlation of Fixed Effects:
#>       (Intr)
#> Month -0.831

^{由 reprex package (v2.0.1)}

于 2021-09-26 创建

使用离散数据分析生物体的生长模式

Analysing the pattern in the growth of organisms using discrete data

r

time-series

discrete