选择总和

Sum of selection

我需要找到蒸腾总和才能知道每天的失水量。我有一个数据表,连续几天每 5 分钟有一次植物的蒸腾作用,我想知道从午夜到一天中某个时间的水分流失情况。有谁知道我如何解决这个问题,最好是不使用 for 循环?

这就是我的数据表的样子。

这是输出 dput(resdf[sample(seq_len(nrow(resdf)), 20L, replace = TRUE), ])

structure(list(datetime = structure(c(1631829060, 1629452580, 
1633573500, 1632804540, 1632026580, 1630784460, 1629697440, 1630264740, 
1629618660, 1632762180, 1631546280, 1630984860, 1631536380, 1631223300, 
1631197680, 1633019220, 1630395600, 1633295760, 1631682840, 1629258780
), class = c("POSIXct", "POSIXt"), tzone = ""), id = structure(c(14L, 
14L, 3L, 7L, 5L, 14L, 24L, 24L, 1L, 3L, 18L, 9L, 19L, 1L, 5L, 
3L, 17L, 12L, 15L, 4L), .Label = c("A1", "A10", "A11", "A12", 
"A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "B1", "B10", 
"B11", "B12", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9"), class = "factor"), 
    date = structure(c(18885, 18858, 18906, 18897, 18888, 18873, 
    18861, 18867, 18860, 18896, 18882, 18876, 18882, 18878, 18878, 
    18899, 18869, 18902, 18884, 18856), class = "Date"), time = structure(c(0.99375, 
    0.488194444444444, 0.184027777777778, 0.284027777777778, 
    0.279861111111111, 0.903472222222222, 0.322222222222222, 
    0.888194444444444, 0.410416666666667, 0.79375, 0.720833333333333, 
    0.222916666666667, 0.60625, 0.982638888888889, 0.686111111111111, 
    0.76875, 0.402777777777778, 0.969444444444444, 0.301388888888889, 
    0.245138888888889), format = "h:m:s", class = "times"), row = c("B", 
    "B", "A", "A", "A", "B", "B", "B", "A", "A", "B", "A", "B", 
    "A", "A", "A", "B", "A", "B", "A"), column = c("10", "10", 
    "11", "4", "2", "10", "9", "9", "1", "11", "3", "6", "4", 
    "1", "2", "11", "2", "9", "11", "12"), weight = c(8540.2, 
    8550.2, 9101.9, 8148.2, 5827.8, 8530, 8635.2, 8555, 8531.5, 
    8958.5, 6926.7, 8046.2, 6738.9, 6555.9, 7094.1, 8932.3, 7919, 
    7829.1, 6708.3, 8488.1), ID = structure(c(7L, 7L, 6L, 23L, 
    11L, 7L, 14L, 14L, 10L, 6L, 13L, 17L, 4L, 10L, 11L, 6L, 22L, 
    24L, 21L, 9L), .Label = c("2", "3", "7", "8", "9", "10", 
    "11", "12", "13", "14", "15", "16", "18", "20", "21", "23", 
    "26", "27", "29", "30", "31", "32", "35", "36"), class = "factor"), 
    cultivar = structure(c(3L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 1L, 
    3L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 1L), .Label = c("Kluai Tiparot", 
    "Red Dacca", "Simili Radjah"), class = "factor"), treatment = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = "21/14 °C", class = "factor"), 
    sg = c(NA, NA, 9102.72997996696, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, 6555.90116322923, NA, NA, 7918.36797840064, 
    NA, NA, NA), trans = c(NA, NA, 0.053774096781126, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0.0990147946668003, NA, NA, 
    0.666644920945146, NA, NA, NA), transh = c(NA, NA, 0.645289161373512, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.1881775360016, 
    NA, NA, 7.99973905134175, NA, NA, NA), rel_trans = c(NA, 
    NA, 5.90800786441578e-06, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 1.51031581730655e-05, NA, NA, 8.4182967665759e-05, 
    NA, NA, NA)), row.names = c(892101L, 86789L, 1498802L, 1237108L, 
968955L, 534540L, 188878L, 368232L, 130348L, 1203252L, 807585L, 
617076L, 808671L, 666972L, 671529L, 1291962L, 418660L, 1392510L, 
863133L, 15537L), class = "data.frame")

为了能够证明我们的函数(daily_cumulative_trans,见下文)有效,我将首先 select 来自 resdf 的单个植物并复制该植物的所有行不包含 trans

的缺失值
# function to create a subset of resdf and replicate rows
# that do now have missing values for transpiration
my_subset <- \(DF, individuals, times = 2L) {
  DF <- with(DF, DF[order(id, as.Date(datetime)),])
  sub <- subset(DF, id %in% individuals)
  row_to_replicate <-
    which(!rowSums(apply(subset(sub, select = c('id', 'trans')), 2, is.na)) > 0)
  replicated_rows <- do.call(rbind,
    replicate(times, sub[row_to_replicate, ], simplify = FALSE)
  )
  sub <- rbind(sub, replicated_rows)
  out <- with(sub, sub[order(id, as.Date(datetime)),])
  return(out)
}

现在我们可以 select 种植 A11

resdf_sub <- my_subset(DF = resdf, individuals = 'A11')

最后,我们需要一个函数来计算每株植物每天的累积蒸腾量,并将其作为新列添加到 resdf。我将调用此函数 daily_cumulative_trans 和要添加的列 cumulative_trans_per_day.

daily_cumulative_trans <- \(DF) {
  DF <- with(DF, DF[order(id, as.Date(datetime)),])
  my_split <- split(DF, f = ~ DF$id + DF$date)
  out <- lapply(my_split, \(x) {
    cbind(x, cumulative_trans_per_day = cumsum(ifelse(is.na(x$trans), 0, x$trans)))
  })
  DF <- do.call(rbind, out)
  rownames(DF) <- NULL
  return(DF)
}

# execute function on plant A11
resdf_sub <- daily_cumulative_trans(DF = resdf_sub)

输出

subset(resdf_sub, id %in% 'A11',
       select = c('id', 'datetime', 'trans', 'cumulative_trans_per_day'))
# --------------------------------------------------------
   id            datetime     trans cumulative_trans_per_day
1 A11 2021-09-27 19:03:00        NA                0.0000000
2 A11 2021-09-30 18:27:00        NA                0.0000000
3 A11 2021-10-07 04:25:00 0.0537741                0.0537741
4 A11 2021-10-07 04:25:00 0.0537741                0.1075482
5 A11 2021-10-07 04:25:00 0.0537741                0.1613223

注意:如果您使用 R <4.1.0

版本,请使用 function(x) 而不是 \(x)