将 for 循环转换为函数并应用于数据帧列表
Converting for loop to function and applying to list of dataframes
我之前创建了一个 for 循环来识别水位下降的时间段。这适用于较小的连续时间序列数据。
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
maxP.neg <- .1
# Create objects for holding the start and end dates. These lists should end up
# the same length, so that each start date has a corresponding end date.
startDates <- c()
endDates <- c()
recede <- 0 # This is a switch to keep track of whether a recession period is in progress
for (i in 2:length(level_data)) { # i.e. start at the second data point
diffQ <- level_data[i] - level_data[i - 1] # Calculate difference between current and previous timestamp
if (diffQ < 0 && # If difference is negative (i.e. receding) AND
recede == 0 && # a recession period has not already begun (recede == 0) AND
precip_data <= maxP.neg) { # min. dry period criteria is met ...
startDates <- append(startDates, times_stamp[i]) # Record the start time of the recession period
recede <- 1 # Change recede to 1 to indicate a recession period has begun
} else if (diffQ >= 0 && # If the difference becomes positive
recede == 1) { # and a recession period was in progress...
endDates <- append(endDates, times_stamp[i - 1]) # Record the previous timestamp as the end date of the recession
recede <- 0 # Set recede back to 0 to show the recession period has ended
} else { # Otherwise just continue to the next data point.
next
}
}
但是,我想对有时间间隔的较大数据集执行相同的分析。我想将数据拆分为数据帧列表并使用 lapply
和自定义函数。
这是我想出来的,但我没有得到与 for 循环方法相同的输出?
diff_Q <- diff(level_data)
date_time1 <-
seq(ymd_hms('2015-07-22 12:15:00'),
ymd_hms('2015-07-22 20:15:00'),
by = '15 mins')
date_time2 <-
seq(ymd_hms('2015-07-25 08:00:00'),
ymd_hms('2015-07-25 16:00:00'),
by = '15 mins')
cum_precip <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 4))
df1 <-
data.frame(date_time1, diff_Q, cum_precip) %>% rename(date_time = date_time1)
df2 <-
data.frame(date_time2, diff_Q, cum_precip) %>% rename(date_time = date_time2)
recede_ls <- list(df1, df2)
startDates2 <- c()
endDates2 <- c()
RA.function <- function(x) {
recede <- 0
if (diff_Q < 0 &&
recede == 0 &&
cum_precip <= maxP.neg) {
startDates2 <- x$date_time
recede <- 1
} else if (diffQ >= 0 &&
recede == 1) {
endDates2 <- x$date_time[-1]
recede <-
0
} else {
next
}
}
lapply(recede_ls, RA.function)
感谢您的帮助!
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
df <- data.frame(date_time = times_stamp,
level_data = level_data,
cum_precip = precip_data)
# Precalculate `diffQ`
df$diffQ <- NA
for(i in 2:nrow(df)){
df[i,"diffQ"] <- df[i,"level_data"] - df[i-1,"level_data"]
}
df
#> date_time level_data cum_precip diffQ
#> 1 2015-07-22 12:15:00 10 0 NA
#> 2 2015-07-22 12:30:00 9 0 -1
#> 3 2015-07-22 12:45:00 8 0 -1
#> 4 2015-07-22 13:00:00 7 0 -1
#> 5 2015-07-22 13:15:00 6 0 -1
#> 6 2015-07-22 13:30:00 5 0 -1
#> 7 2015-07-22 13:45:00 4 0 -1
#> 8 2015-07-22 14:00:00 20 0 16
#> 9 2015-07-22 14:15:00 19 0 -1
#> 10 2015-07-22 14:30:00 18 0 -1
#> 11 2015-07-22 14:45:00 17 1 -1
#> 12 2015-07-22 15:00:00 16 2 -1
#> 13 2015-07-22 15:15:00 15 3 -1
#> 14 2015-07-22 15:30:00 14 4 -1
#> 15 2015-07-22 15:45:00 13 0 -1
#> 16 2015-07-22 16:00:00 12 0 -1
#> 17 2015-07-22 16:15:00 11 0 -1
#> 18 2015-07-22 16:30:00 10 0 -1
#> 19 2015-07-22 16:45:00 9 0 -1
#> 20 2015-07-22 17:00:00 16 0 7
#> 21 2015-07-22 17:15:00 15 0 -1
#> 22 2015-07-22 17:30:00 14 0 -1
#> 23 2015-07-22 17:45:00 13 0 -1
#> 24 2015-07-22 18:00:00 12 0 -1
#> 25 2015-07-22 18:15:00 11 1 -1
#> 26 2015-07-22 18:30:00 10 2 -1
#> 27 2015-07-22 18:45:00 9 3 -1
#> 28 2015-07-22 19:00:00 8 4 -1
#> 29 2015-07-22 19:15:00 7 5 -1
#> 30 2015-07-22 19:30:00 6 0 -1
#> 31 2015-07-22 19:45:00 5 0 -1
#> 32 2015-07-22 20:00:00 0 0 -5
#> 33 2015-07-22 20:15:00 0 0 0
#> 34 2015-07-22 20:30:00 0 0 0
dfs <- list(df, df)
RA.function <- function(x) {
startDates2 <- c()
endDates2 <- c()
recede <- 0
maxP.neg <- 0.1
for(i in 2:nrow(x)){
if (x[i,"diffQ"] < 0 && recede == 0 && x[i,"cum_precip"] <= maxP.neg) {
startDates2 <- c(startDates2, paste(x[i, "date_time"]))
recede <- 1
} else if (x[i,"diffQ"] >= 0 && recede == 1) {
endDates2 <- c(endDates2, paste(x[i-1,"date_time"]))
recede <- 0
}
}
return(cbind(startDates2, endDates2))
}
# Apply your function to a list of dataframes
results <- lapply(dfs, RA.function)
# Results that correspond with the first dataframe in the input list (`dfs[[1]]`)
results[[1]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
# Results that correspond with the second dataframe in the input list (`dfs[[2]]`)
results[[2]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
Created on 2022-01-07 by the reprex package (v2.0.1)
我之前创建了一个 for 循环来识别水位下降的时间段。这适用于较小的连续时间序列数据。
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
maxP.neg <- .1
# Create objects for holding the start and end dates. These lists should end up
# the same length, so that each start date has a corresponding end date.
startDates <- c()
endDates <- c()
recede <- 0 # This is a switch to keep track of whether a recession period is in progress
for (i in 2:length(level_data)) { # i.e. start at the second data point
diffQ <- level_data[i] - level_data[i - 1] # Calculate difference between current and previous timestamp
if (diffQ < 0 && # If difference is negative (i.e. receding) AND
recede == 0 && # a recession period has not already begun (recede == 0) AND
precip_data <= maxP.neg) { # min. dry period criteria is met ...
startDates <- append(startDates, times_stamp[i]) # Record the start time of the recession period
recede <- 1 # Change recede to 1 to indicate a recession period has begun
} else if (diffQ >= 0 && # If the difference becomes positive
recede == 1) { # and a recession period was in progress...
endDates <- append(endDates, times_stamp[i - 1]) # Record the previous timestamp as the end date of the recession
recede <- 0 # Set recede back to 0 to show the recession period has ended
} else { # Otherwise just continue to the next data point.
next
}
}
但是,我想对有时间间隔的较大数据集执行相同的分析。我想将数据拆分为数据帧列表并使用 lapply
和自定义函数。
这是我想出来的,但我没有得到与 for 循环方法相同的输出?
diff_Q <- diff(level_data)
date_time1 <-
seq(ymd_hms('2015-07-22 12:15:00'),
ymd_hms('2015-07-22 20:15:00'),
by = '15 mins')
date_time2 <-
seq(ymd_hms('2015-07-25 08:00:00'),
ymd_hms('2015-07-25 16:00:00'),
by = '15 mins')
cum_precip <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 4))
df1 <-
data.frame(date_time1, diff_Q, cum_precip) %>% rename(date_time = date_time1)
df2 <-
data.frame(date_time2, diff_Q, cum_precip) %>% rename(date_time = date_time2)
recede_ls <- list(df1, df2)
startDates2 <- c()
endDates2 <- c()
RA.function <- function(x) {
recede <- 0
if (diff_Q < 0 &&
recede == 0 &&
cum_precip <= maxP.neg) {
startDates2 <- x$date_time
recede <- 1
} else if (diffQ >= 0 &&
recede == 1) {
endDates2 <- x$date_time[-1]
recede <-
0
} else {
next
}
}
lapply(recede_ls, RA.function)
感谢您的帮助!
library(tidyverse)
library(lubridate)
level_data <- c(10:4, 20:9, 16:5, rep(0, 3))
times_stamp <- seq(ymd_hms('2015-07-22 12:15:00'), ymd_hms('2015-07-22 20:30:00'), by = '15 mins')
precip_data <- c(rep(0, 10), 1:4, rep(0, 10), 1:5, rep(0, 5))
df <- data.frame(date_time = times_stamp,
level_data = level_data,
cum_precip = precip_data)
# Precalculate `diffQ`
df$diffQ <- NA
for(i in 2:nrow(df)){
df[i,"diffQ"] <- df[i,"level_data"] - df[i-1,"level_data"]
}
df
#> date_time level_data cum_precip diffQ
#> 1 2015-07-22 12:15:00 10 0 NA
#> 2 2015-07-22 12:30:00 9 0 -1
#> 3 2015-07-22 12:45:00 8 0 -1
#> 4 2015-07-22 13:00:00 7 0 -1
#> 5 2015-07-22 13:15:00 6 0 -1
#> 6 2015-07-22 13:30:00 5 0 -1
#> 7 2015-07-22 13:45:00 4 0 -1
#> 8 2015-07-22 14:00:00 20 0 16
#> 9 2015-07-22 14:15:00 19 0 -1
#> 10 2015-07-22 14:30:00 18 0 -1
#> 11 2015-07-22 14:45:00 17 1 -1
#> 12 2015-07-22 15:00:00 16 2 -1
#> 13 2015-07-22 15:15:00 15 3 -1
#> 14 2015-07-22 15:30:00 14 4 -1
#> 15 2015-07-22 15:45:00 13 0 -1
#> 16 2015-07-22 16:00:00 12 0 -1
#> 17 2015-07-22 16:15:00 11 0 -1
#> 18 2015-07-22 16:30:00 10 0 -1
#> 19 2015-07-22 16:45:00 9 0 -1
#> 20 2015-07-22 17:00:00 16 0 7
#> 21 2015-07-22 17:15:00 15 0 -1
#> 22 2015-07-22 17:30:00 14 0 -1
#> 23 2015-07-22 17:45:00 13 0 -1
#> 24 2015-07-22 18:00:00 12 0 -1
#> 25 2015-07-22 18:15:00 11 1 -1
#> 26 2015-07-22 18:30:00 10 2 -1
#> 27 2015-07-22 18:45:00 9 3 -1
#> 28 2015-07-22 19:00:00 8 4 -1
#> 29 2015-07-22 19:15:00 7 5 -1
#> 30 2015-07-22 19:30:00 6 0 -1
#> 31 2015-07-22 19:45:00 5 0 -1
#> 32 2015-07-22 20:00:00 0 0 -5
#> 33 2015-07-22 20:15:00 0 0 0
#> 34 2015-07-22 20:30:00 0 0 0
dfs <- list(df, df)
RA.function <- function(x) {
startDates2 <- c()
endDates2 <- c()
recede <- 0
maxP.neg <- 0.1
for(i in 2:nrow(x)){
if (x[i,"diffQ"] < 0 && recede == 0 && x[i,"cum_precip"] <= maxP.neg) {
startDates2 <- c(startDates2, paste(x[i, "date_time"]))
recede <- 1
} else if (x[i,"diffQ"] >= 0 && recede == 1) {
endDates2 <- c(endDates2, paste(x[i-1,"date_time"]))
recede <- 0
}
}
return(cbind(startDates2, endDates2))
}
# Apply your function to a list of dataframes
results <- lapply(dfs, RA.function)
# Results that correspond with the first dataframe in the input list (`dfs[[1]]`)
results[[1]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
# Results that correspond with the second dataframe in the input list (`dfs[[2]]`)
results[[2]]
#> startDates2 endDates2
#> [1,] "2015-07-22 12:30:00" "2015-07-22 13:45:00"
#> [2,] "2015-07-22 14:15:00" "2015-07-22 16:45:00"
#> [3,] "2015-07-22 17:15:00" "2015-07-22 20:00:00"
Created on 2022-01-07 by the reprex package (v2.0.1)