多列 data.frame 作为 purrr::pmap 的列表参数用于数据框的迭代
Multi-column data.frame as list argument to purrr::pmap for iteration over a data frame
我正在尝试 运行 一个函数,使用 purrr::pmap 计算数据框中每一行的指定范围内的时间比例,但是原始函数的参数之一是 11列。这意味着该函数在单行上运行良好,但在遍历每一行时,它会失败:
df_test <- structure(list(duration = c(268, 264, 256, 200, 296, 60, 16,
396, 400, 388), d1 = c(22.4, 12.4, 15.6, 21.6, 15.2, 2, 1.2,
2.4, 2, 1.6), d2 = c(25.2, 16, 16.8, 28.4, 16.8, 2, 1.6, 2.4,
2, 2), d3 = c(24.8, 18.4, 16.4, 28, 16.4, 2, 1.6, 2.4, 2, 2),
d4 = c(21.6, 20.4, 18, 24.4, 16.4, 2, 1.6, 2.8, 2, 1.6),
d5 = c(24.4, 20.4, 19.2, 25.2, 17.6, 1.6, 1.6, 2.8, 2, 2),
d6 = c(24.8, 19.6, 18.8, 24, 19.2, 1.6, 1.6, 2.8, 2, 2),
d7 = c(22.8, 19.6, 18, 23.6, 18.8, 1.6, 2, 2.8, 2.4, 1.6),
d8 = c(17.6, 18.4, 17.6, 22.4, 20.8, 1.6, 2, 2.8, 2.4, 2),
d9 = c(20.4, 16.4, 16.8, 20.4, 23.2, 1.6, 2, 2.8, 2.4, 2),
d10 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), d11 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), bottom = c(18.02, 14.03, 14.03,
20.3, 18.95, 4.54, 4.54, 4.54, 4.54, 4.54)), row.names = c("5",
"6", "7", "9", "10", "17", "18", "19", "20", "21"), class = "data.frame")
prop_fun <- function(time, d, bottom, mini=5, maxi=25 ) {
#temp = df[i,]
Secs = seq(from=0, to=time, by=1) # create sequence of one second intervals
DT = seq(from = 0, to=time, by=time/10)
if(is.na(d[11])){
DD = data.frame(c(0, d[1:9], 0))
# interpolate d
d.reg = approx(DT, DD, xout=Secs)$y
d.regS = bottom-d.reg
output = length(d.regS[d.regS>mini & d.regS<maxi ])/time
} else {
DD = data.frame(c(0, d[1:11], 0))
d.reg = approx(DT, DD, xout=diveSecs)$y
d.regS = bottom-d.reg
output = length(d.regS[d.regS>mini & d.regS<maxi ])/time
}
return(output)
}
prop_fun(df_test$duration[1], df_test[1,2:12],df_test$bottom[1])
# [1] 0.1268657
argument_list <- list(time=df_test$duration, d=df_test[,2:12],bottom=df_test$bottom, mini=5, maxi=25 )
try <- purrr::pmap(argument_list, prop_fun)
#Error: Element 2 of `.l` must have length 1 or 10, not 11
不太确定单个 运行 和 purrr::pmap 的列表参数之间的输入有何不同。我是否需要将所有单独的列作为单独的参数包含在 prop_fun
中并将它们连接起来?
length(df) = ncol(df),但我认为您想将每一行传递给您的函数。您可以使用以下方法提取行列表:
d = lapply(1:nrow(df_test[, 2:12]), function(x) df_test[x, 2:12])
当然,对于 purrr::pmap
,您也可以 assemble 将所有内容放入 data.frame 或 tibble 并传递:
tibble::tibble(
time = df_test$duration,
bottom = df_test$bottom,
d = lapply(1:nrow(df_test[, 2:12]), function(x) df_test[x, 2:12]),
mini = 5,
maxi = 25
) %>%
purrr::pmap(prop_fun)
我正在尝试 运行 一个函数,使用 purrr::pmap 计算数据框中每一行的指定范围内的时间比例,但是原始函数的参数之一是 11列。这意味着该函数在单行上运行良好,但在遍历每一行时,它会失败:
df_test <- structure(list(duration = c(268, 264, 256, 200, 296, 60, 16,
396, 400, 388), d1 = c(22.4, 12.4, 15.6, 21.6, 15.2, 2, 1.2,
2.4, 2, 1.6), d2 = c(25.2, 16, 16.8, 28.4, 16.8, 2, 1.6, 2.4,
2, 2), d3 = c(24.8, 18.4, 16.4, 28, 16.4, 2, 1.6, 2.4, 2, 2),
d4 = c(21.6, 20.4, 18, 24.4, 16.4, 2, 1.6, 2.8, 2, 1.6),
d5 = c(24.4, 20.4, 19.2, 25.2, 17.6, 1.6, 1.6, 2.8, 2, 2),
d6 = c(24.8, 19.6, 18.8, 24, 19.2, 1.6, 1.6, 2.8, 2, 2),
d7 = c(22.8, 19.6, 18, 23.6, 18.8, 1.6, 2, 2.8, 2.4, 1.6),
d8 = c(17.6, 18.4, 17.6, 22.4, 20.8, 1.6, 2, 2.8, 2.4, 2),
d9 = c(20.4, 16.4, 16.8, 20.4, 23.2, 1.6, 2, 2.8, 2.4, 2),
d10 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), d11 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), bottom = c(18.02, 14.03, 14.03,
20.3, 18.95, 4.54, 4.54, 4.54, 4.54, 4.54)), row.names = c("5",
"6", "7", "9", "10", "17", "18", "19", "20", "21"), class = "data.frame")
prop_fun <- function(time, d, bottom, mini=5, maxi=25 ) {
#temp = df[i,]
Secs = seq(from=0, to=time, by=1) # create sequence of one second intervals
DT = seq(from = 0, to=time, by=time/10)
if(is.na(d[11])){
DD = data.frame(c(0, d[1:9], 0))
# interpolate d
d.reg = approx(DT, DD, xout=Secs)$y
d.regS = bottom-d.reg
output = length(d.regS[d.regS>mini & d.regS<maxi ])/time
} else {
DD = data.frame(c(0, d[1:11], 0))
d.reg = approx(DT, DD, xout=diveSecs)$y
d.regS = bottom-d.reg
output = length(d.regS[d.regS>mini & d.regS<maxi ])/time
}
return(output)
}
prop_fun(df_test$duration[1], df_test[1,2:12],df_test$bottom[1])
# [1] 0.1268657
argument_list <- list(time=df_test$duration, d=df_test[,2:12],bottom=df_test$bottom, mini=5, maxi=25 )
try <- purrr::pmap(argument_list, prop_fun)
#Error: Element 2 of `.l` must have length 1 or 10, not 11
不太确定单个 运行 和 purrr::pmap 的列表参数之间的输入有何不同。我是否需要将所有单独的列作为单独的参数包含在 prop_fun
中并将它们连接起来?
length(df) = ncol(df),但我认为您想将每一行传递给您的函数。您可以使用以下方法提取行列表:
d = lapply(1:nrow(df_test[, 2:12]), function(x) df_test[x, 2:12])
当然,对于 purrr::pmap
,您也可以 assemble 将所有内容放入 data.frame 或 tibble 并传递:
tibble::tibble(
time = df_test$duration,
bottom = df_test$bottom,
d = lapply(1:nrow(df_test[, 2:12]), function(x) df_test[x, 2:12]),
mini = 5,
maxi = 25
) %>%
purrr::pmap(prop_fun)