将 for 循环转换为函数并应用于数据帧列表

Question

我之前创建了一个 for 循环来识别水位下降的时间段。这适用于较小的连续时间序列数据。

library(tidyverse)
library(lubridate)

level_data <- c(10:4, 20:9, 16:5, rep(0, 3))           
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))

maxP.neg <- .1

# Create objects for holding the start and end dates. These lists should end up 
# the same length, so that each start date has a corresponding end date.

startDates <- c()                                     
endDates <- c()

recede <- 0                                           # This is a switch to keep track of whether a recession period is in progress

for (i in 2:length(level_data)) {                     # i.e. start at the second data point
 diffQ <- level_data[i] - level_data[i - 1]          # Calculate difference between current and previous timestamp
 
 if (diffQ < 0 &&                                    # If difference is negative (i.e. receding) AND
     recede == 0 &&                                  # a recession period has not already begun (recede == 0) AND
     precip_data <= maxP.neg) {                      # min. dry period criteria is met ...
   startDates <- append(startDates, times_stamp[i])  # Record the start time of the recession period
   recede <- 1                                       # Change recede to 1 to indicate a recession period has begun
   
 } else if (diffQ >= 0 &&                            # If the difference becomes positive 
            recede == 1) {                           # and a recession period was in progress...
   endDates <- append(endDates, times_stamp[i - 1])  # Record the previous timestamp as the end date of the recession     
   recede <- 0                                       # Set recede back to 0 to show the recession period has ended                             
   
 } else {                                            # Otherwise just continue to the next data point.
   next                                     
 }
}

但是，我想对有时间间隔的较大数据集执行相同的分析。我想将数据拆分为数据帧列表并使用 lapply 和自定义函数。

这是我想出来的，但我没有得到与 for 循环方法相同的输出？

diff_Q <- diff(level_data)

date_time1 <-
  seq(ymd_hms('2015-07-22 12:15:00'),
      ymd_hms('2015-07-22 20:15:00'),
      by = '15 mins')
date_time2 <-
  seq(ymd_hms('2015-07-25 08:00:00'),
      ymd_hms('2015-07-25 16:00:00'),
      by = '15 mins')

cum_precip <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 4))

df1 <-
  data.frame(date_time1, diff_Q, cum_precip) %>% rename(date_time = date_time1)
df2 <-
  data.frame(date_time2, diff_Q, cum_precip) %>%  rename(date_time = date_time2)

recede_ls <- list(df1, df2)

startDates2 <- c()
endDates2 <- c()

RA.function <- function(x) {
  recede <- 0
  
  if (diff_Q < 0 &&
      recede == 0 &&
      cum_precip <= maxP.neg) {
    startDates2 <- x$date_time
    recede <- 1
  } else if (diffQ >= 0 &&
             recede == 1) {
    endDates2 <- x$date_time[-1]
    recede <-
      0
    
  } else {
    next
  }
}

lapply(recede_ls, RA.function)

感谢您的帮助！

Answer 1

library(tidyverse)
library(lubridate)

level_data <- c(10:4, 20:9, 16:5, rep(0, 3))           
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))

df <- data.frame(date_time = times_stamp,
                 level_data = level_data, 
                 cum_precip = precip_data)

# Precalculate `diffQ`
df$diffQ <- NA
for(i in 2:nrow(df)){
  df[i,"diffQ"] <- df[i,"level_data"] - df[i-1,"level_data"]
}

df
#>              date_time level_data cum_precip diffQ
#> 1  2015-07-22 12:15:00         10          0    NA
#> 2  2015-07-22 12:30:00          9          0    -1
#> 3  2015-07-22 12:45:00          8          0    -1
#> 4  2015-07-22 13:00:00          7          0    -1
#> 5  2015-07-22 13:15:00          6          0    -1
#> 6  2015-07-22 13:30:00          5          0    -1
#> 7  2015-07-22 13:45:00          4          0    -1
#> 8  2015-07-22 14:00:00         20          0    16
#> 9  2015-07-22 14:15:00         19          0    -1
#> 10 2015-07-22 14:30:00         18          0    -1
#> 11 2015-07-22 14:45:00         17          1    -1
#> 12 2015-07-22 15:00:00         16          2    -1
#> 13 2015-07-22 15:15:00         15          3    -1
#> 14 2015-07-22 15:30:00         14          4    -1
#> 15 2015-07-22 15:45:00         13          0    -1
#> 16 2015-07-22 16:00:00         12          0    -1
#> 17 2015-07-22 16:15:00         11          0    -1
#> 18 2015-07-22 16:30:00         10          0    -1
#> 19 2015-07-22 16:45:00          9          0    -1
#> 20 2015-07-22 17:00:00         16          0     7
#> 21 2015-07-22 17:15:00         15          0    -1
#> 22 2015-07-22 17:30:00         14          0    -1
#> 23 2015-07-22 17:45:00         13          0    -1
#> 24 2015-07-22 18:00:00         12          0    -1
#> 25 2015-07-22 18:15:00         11          1    -1
#> 26 2015-07-22 18:30:00         10          2    -1
#> 27 2015-07-22 18:45:00          9          3    -1
#> 28 2015-07-22 19:00:00          8          4    -1
#> 29 2015-07-22 19:15:00          7          5    -1
#> 30 2015-07-22 19:30:00          6          0    -1
#> 31 2015-07-22 19:45:00          5          0    -1
#> 32 2015-07-22 20:00:00          0          0    -5
#> 33 2015-07-22 20:15:00          0          0     0
#> 34 2015-07-22 20:30:00          0          0     0

dfs <- list(df, df)

RA.function <- function(x) {
  
  startDates2 <- c()
  endDates2 <- c()
  recede <- 0
  maxP.neg <- 0.1
  
  for(i in 2:nrow(x)){
    
    if (x[i,"diffQ"] < 0 && recede == 0 && x[i,"cum_precip"] <= maxP.neg) {
      startDates2 <- c(startDates2, paste(x[i, "date_time"]))
      recede <- 1
    } else if (x[i,"diffQ"] >= 0 && recede == 1) {
      endDates2 <- c(endDates2, paste(x[i-1,"date_time"]))
      recede <- 0
    }
  }
  return(cbind(startDates2, endDates2))
}

# Apply your function to a list of dataframes
results <- lapply(dfs, RA.function)

# Results that correspond with the first dataframe in the input list (`dfs[[1]]`)
results[[1]]
#>      startDates2           endDates2            
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"

# Results that correspond with the second dataframe in the input list (`dfs[[2]]`)
results[[2]]
#>      startDates2           endDates2            
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
Created on 2022-01-07 by the reprex package (v2.0.1)

将 for 循环转换为函数并应用于数据帧列表

Converting for loop to function and applying to list of dataframes

for-loop

if-statement

r

time-series

lapply