为什么我的自定义函数在改变 tibble 时不起作用?
Why does my custom function not work while mutating a tibble?
我创建了一个自定义函数来为日期添加工作日。该功能依赖于以下包:
library(tidyverse)
library(lubridate)
library(tidyquant)
这是我创建的函数:
add_workingdays <- function(start_date, number_of_days, switch_count_weekendsholidays = TRUE, remove_weekends = TRUE, holidays = NULL){
start_date <- start_date %>% as.Date()
if (!is.Date(start_date)) stop("add_workingdays(): start_date must be a date.", call. = FALSE)
target_date <- start_date + number_of_days
if(switch_count_weekendsholidays){
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
while(target_date_lenght != number_of_days) {
target_date <- target_date + 1
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
}
}
target_date %>% return()
}
当我运行以下场景中的功能时,它可以正常工作。
add_workingdays(start_date = '2022-04-08' %>% as.Date(), number_of_days = 5)
[1] "2022-04-14"
'2022-04-08' %>% as.Date() %>% add_workingdays(number_of_days = 5)
[1] "2022-04-14"
但是当我尝试在 tibble 中的 mutate
函数中使用它时,我收到了我不理解的错误消息。
我使用了下面的代码,它在最后给出了错误:
tibble(
+ dates = rep('2022-04-08' %>% as.Date()), #) seq.Date(from = '2022-04-08' %>% as.Date(), by = 'days', length.out = 5),
+ days_to_add = rep(10:5)
+ ) %>%
+ print() %>%
+ mutate(
+ target_date = add_workingdays(start_date = dates, number_of_days = days_to_add)
+ )
# A tibble: 6 x 2
dates days_to_add
<date> <int>
1 2022-04-08 10
2 2022-04-08 9
3 2022-04-08 8
4 2022-04-08 7
5 2022-04-08 6
6 2022-04-08 5
Error in `mutate()`:
! Problem while computing `target_date =
add_workingdays(start_date = dates, number_of_days =
days_to_add)`.
Caused by error in `seq.Date()`:
! 'from' must be of length 1
Run `rlang::last_error()` to see where the error occurred.
任何人都可以向我解释在 mutate 函数中使用此自定义函数时我做错了什么吗?
您的函数必须使用 Vectorize
向量化或使用 purrr::map
或 lapply
:
单独应用于每个元素
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(tidyquant)
#> Loading required package: PerformanceAnalytics
#> Loading required package: xts
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
#>
#> Attaching package: 'PerformanceAnalytics'
#> The following object is masked from 'package:graphics':
#>
#> legend
#> Loading required package: quantmod
#> Loading required package: TTR
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
#> Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
#> </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
add_workingdays <- function(start_date, number_of_days, switch_count_weekendsholidays = TRUE, remove_weekends = TRUE, holidays = NULL) {
start_date <- start_date %>% as.Date()
if (!is.Date(start_date)) stop("add_workingdays(): start_date must be a date.", call. = FALSE)
target_date <- start_date + number_of_days
if (switch_count_weekendsholidays) {
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
while (target_date_lenght != number_of_days) {
target_date <- target_date + 1
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
}
}
target_date %>% return()
}
add_workingdays(start_date = "2022-04-08" %>% as.Date(), number_of_days = 5)
#> [1] "2022-04-14"
c("2022-04-08", "2022-04-09") %>%
as.Date() %>%
add_workingdays(number_of_days = 5)
#> Error in seq.Date(from = AS_DATE(start_date), to = AS_DATE(end_date), : 'from' must be of length 1
add_workingdays <- Vectorize(add_workingdays)
c("2022-04-08", "2022-04-09") %>%
as.Date() %>%
add_workingdays(number_of_days = 5)
#> [1] 19096 19097
tibble(
dates = rep("2022-04-08" %>% as.Date()), # ) seq.Date(from = '2022-04-08' %>% as.Date(), by = 'days', length.out = 5),
days_to_add = rep(10:5)
) %>%
mutate(
target_date = add_workingdays(start_date = dates, number_of_days = days_to_add)
)
#> # A tibble: 6 × 3
#> dates days_to_add target_date
#> <date> <int> <dbl>
#> 1 2022-04-08 10 19103
#> 2 2022-04-08 9 19102
#> 3 2022-04-08 8 19101
#> 4 2022-04-08 7 19100
#> 5 2022-04-08 6 19097
#> 6 2022-04-08 5 19096
由 reprex package (v2.0.0)
于 2022-04-13 创建
错误与mutate
无关。它来自 tidyquant::WORKDAY_SEQUENCE
函数中参数的不正确输入。如果通过将其打印到控制台来检查此功能,您将得到:
tidyquant::WORKDAY_SEQUENCE #with no parens
#function (start_date, end_date, remove_weekends = TRUE, holidays = NULL)
#{
# day_sequence <- DATE_SEQUENCE(start_date, end_date, by = "day")
# ret_tbl <- tibble::tibble(day_sequence = day_sequence) %>%
# ...#and more
这说明这个函数使用了DATE_SEQUENCE
函数。同样,如果检查此函数,您会发现它使用 seq.Date
函数,该函数需要 from
参数的单个日期。例如:
seq.Date(from = as.Date("2020-01-01"), to = as.Date("2020-01-03"), by = 'days')
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
WORKDAY_SEQUENCE("2020-01-01", "2020-01-03")
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
如果您向 from
输入多个日期数据,您会得到相同的错误:
seq.Date(from = c(as.Date("2020-01-01"), as.Date("2020-01-02")), to = as.Date("2020-01-03"), by = 'days')
#Error in seq.Date(c(as.Date("2020-01-01"), as.Date("2020-01-02")), #as.Date("2020-01-03"), :
# 'from' must be of length 1
WORKDAY_SEQUENCE(c("2020-01-01", "2020-01-02"), "2020-01-03")
#Error in seq.Date(from = AS_DATE(start_date), to = AS_DATE(end_date), :
# 'from' must be of length 1
为避免此错误,当您要输入 start_date
的列时,您必须矢量化 WORKDAY_SEQUENCE
函数,这意味着您将函数应用于列中的每个日期。正如@danloo 所示,您可以使用 Vectorize
。您也可以使用 *apply
家庭。
案例 1:start_date
有多个日期,end_date
有一个日期。
在这种情况下,您可以使用 lapply
或 sapply
.
lapply(c("2020-01-01", "2020-01-02"), WORKDAY_SEQUENCE, end_date = "2020-01-03")
#[[1]]
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
#[[2]]
#[1] "2020-01-02" "2020-01-03"
您还需要向量化 length()
函数。因为 lapply
return 是一个列表,如果你需要 return 一个向量以便它可以用于在数据框中创建列,你可以使用 unlist()
:
c("2020-01-01", "2020-01-02") %>%
lapply(WORKDAY_SEQUENCE, end_date = "2020-01-03") %>%
lapply(length) %>%
unlist()
#[1] 3 2
因此,如果您想遵循上面的 lapply
方法,您函数中的相关行应修改为
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date,
target_date, remove_weekends,
holidays = holidays) %>%
length()
至
target_date_length <- start_date %>%
lapply(tidyquant::WORKDAY_SEQUENCE,
end_date = target_date,
remove_weekends = remove_weekends,
holidays = holidays) %>%
lapply(length) %>%
unlist()
案例 2:start_date
和 end_date
的多个日期
在这种情况下,您可以使用 Map
或 mapply
target_date_length <- mapply(tidyquant::WORKDAY_SEQUENCE,
start_date = start_date,
end_date = end_date,
remove_weekends = remove_weekends,
holidays = holidays)%>%
lapply(length) %>%
unlist()
我创建了一个自定义函数来为日期添加工作日。该功能依赖于以下包:
library(tidyverse)
library(lubridate)
library(tidyquant)
这是我创建的函数:
add_workingdays <- function(start_date, number_of_days, switch_count_weekendsholidays = TRUE, remove_weekends = TRUE, holidays = NULL){
start_date <- start_date %>% as.Date()
if (!is.Date(start_date)) stop("add_workingdays(): start_date must be a date.", call. = FALSE)
target_date <- start_date + number_of_days
if(switch_count_weekendsholidays){
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
while(target_date_lenght != number_of_days) {
target_date <- target_date + 1
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
}
}
target_date %>% return()
}
当我运行以下场景中的功能时,它可以正常工作。
add_workingdays(start_date = '2022-04-08' %>% as.Date(), number_of_days = 5)
[1] "2022-04-14"
'2022-04-08' %>% as.Date() %>% add_workingdays(number_of_days = 5)
[1] "2022-04-14"
但是当我尝试在 tibble 中的 mutate
函数中使用它时,我收到了我不理解的错误消息。
我使用了下面的代码,它在最后给出了错误:
tibble(
+ dates = rep('2022-04-08' %>% as.Date()), #) seq.Date(from = '2022-04-08' %>% as.Date(), by = 'days', length.out = 5),
+ days_to_add = rep(10:5)
+ ) %>%
+ print() %>%
+ mutate(
+ target_date = add_workingdays(start_date = dates, number_of_days = days_to_add)
+ )
# A tibble: 6 x 2
dates days_to_add
<date> <int>
1 2022-04-08 10
2 2022-04-08 9
3 2022-04-08 8
4 2022-04-08 7
5 2022-04-08 6
6 2022-04-08 5
Error in `mutate()`:
! Problem while computing `target_date =
add_workingdays(start_date = dates, number_of_days =
days_to_add)`.
Caused by error in `seq.Date()`:
! 'from' must be of length 1
Run `rlang::last_error()` to see where the error occurred.
任何人都可以向我解释在 mutate 函数中使用此自定义函数时我做错了什么吗?
您的函数必须使用 Vectorize
向量化或使用 purrr::map
或 lapply
:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(tidyquant)
#> Loading required package: PerformanceAnalytics
#> Loading required package: xts
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
#>
#> Attaching package: 'PerformanceAnalytics'
#> The following object is masked from 'package:graphics':
#>
#> legend
#> Loading required package: quantmod
#> Loading required package: TTR
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
#> Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
#> </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
add_workingdays <- function(start_date, number_of_days, switch_count_weekendsholidays = TRUE, remove_weekends = TRUE, holidays = NULL) {
start_date <- start_date %>% as.Date()
if (!is.Date(start_date)) stop("add_workingdays(): start_date must be a date.", call. = FALSE)
target_date <- start_date + number_of_days
if (switch_count_weekendsholidays) {
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
while (target_date_lenght != number_of_days) {
target_date <- target_date + 1
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date, target_date, remove_weekends, holidays = holidays) %>% length()
}
}
target_date %>% return()
}
add_workingdays(start_date = "2022-04-08" %>% as.Date(), number_of_days = 5)
#> [1] "2022-04-14"
c("2022-04-08", "2022-04-09") %>%
as.Date() %>%
add_workingdays(number_of_days = 5)
#> Error in seq.Date(from = AS_DATE(start_date), to = AS_DATE(end_date), : 'from' must be of length 1
add_workingdays <- Vectorize(add_workingdays)
c("2022-04-08", "2022-04-09") %>%
as.Date() %>%
add_workingdays(number_of_days = 5)
#> [1] 19096 19097
tibble(
dates = rep("2022-04-08" %>% as.Date()), # ) seq.Date(from = '2022-04-08' %>% as.Date(), by = 'days', length.out = 5),
days_to_add = rep(10:5)
) %>%
mutate(
target_date = add_workingdays(start_date = dates, number_of_days = days_to_add)
)
#> # A tibble: 6 × 3
#> dates days_to_add target_date
#> <date> <int> <dbl>
#> 1 2022-04-08 10 19103
#> 2 2022-04-08 9 19102
#> 3 2022-04-08 8 19101
#> 4 2022-04-08 7 19100
#> 5 2022-04-08 6 19097
#> 6 2022-04-08 5 19096
由 reprex package (v2.0.0)
于 2022-04-13 创建错误与mutate
无关。它来自 tidyquant::WORKDAY_SEQUENCE
函数中参数的不正确输入。如果通过将其打印到控制台来检查此功能,您将得到:
tidyquant::WORKDAY_SEQUENCE #with no parens
#function (start_date, end_date, remove_weekends = TRUE, holidays = NULL)
#{
# day_sequence <- DATE_SEQUENCE(start_date, end_date, by = "day")
# ret_tbl <- tibble::tibble(day_sequence = day_sequence) %>%
# ...#and more
这说明这个函数使用了DATE_SEQUENCE
函数。同样,如果检查此函数,您会发现它使用 seq.Date
函数,该函数需要 from
参数的单个日期。例如:
seq.Date(from = as.Date("2020-01-01"), to = as.Date("2020-01-03"), by = 'days')
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
WORKDAY_SEQUENCE("2020-01-01", "2020-01-03")
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
如果您向 from
输入多个日期数据,您会得到相同的错误:
seq.Date(from = c(as.Date("2020-01-01"), as.Date("2020-01-02")), to = as.Date("2020-01-03"), by = 'days')
#Error in seq.Date(c(as.Date("2020-01-01"), as.Date("2020-01-02")), #as.Date("2020-01-03"), :
# 'from' must be of length 1
WORKDAY_SEQUENCE(c("2020-01-01", "2020-01-02"), "2020-01-03")
#Error in seq.Date(from = AS_DATE(start_date), to = AS_DATE(end_date), :
# 'from' must be of length 1
为避免此错误,当您要输入 start_date
的列时,您必须矢量化 WORKDAY_SEQUENCE
函数,这意味着您将函数应用于列中的每个日期。正如@danloo 所示,您可以使用 Vectorize
。您也可以使用 *apply
家庭。
案例 1:start_date
有多个日期,end_date
有一个日期。
在这种情况下,您可以使用 lapply
或 sapply
.
lapply(c("2020-01-01", "2020-01-02"), WORKDAY_SEQUENCE, end_date = "2020-01-03")
#[[1]]
#[1] "2020-01-01" "2020-01-02" "2020-01-03"
#[[2]]
#[1] "2020-01-02" "2020-01-03"
您还需要向量化 length()
函数。因为 lapply
return 是一个列表,如果你需要 return 一个向量以便它可以用于在数据框中创建列,你可以使用 unlist()
:
c("2020-01-01", "2020-01-02") %>%
lapply(WORKDAY_SEQUENCE, end_date = "2020-01-03") %>%
lapply(length) %>%
unlist()
#[1] 3 2
因此,如果您想遵循上面的 lapply
方法,您函数中的相关行应修改为
target_date_lenght <- tidyquant::WORKDAY_SEQUENCE(start_date,
target_date, remove_weekends,
holidays = holidays) %>%
length()
至
target_date_length <- start_date %>%
lapply(tidyquant::WORKDAY_SEQUENCE,
end_date = target_date,
remove_weekends = remove_weekends,
holidays = holidays) %>%
lapply(length) %>%
unlist()
案例 2:start_date
和 end_date
的多个日期
在这种情况下,您可以使用 Map
或 mapply
target_date_length <- mapply(tidyquant::WORKDAY_SEQUENCE,
start_date = start_date,
end_date = end_date,
remove_weekends = remove_weekends,
holidays = holidays)%>%
lapply(length) %>%
unlist()