应用于分组数据框中的组的两个日期之间的工作日数
Number of weekdays between two dates applied to groups in a grouped dataframe
我正在尝试在分组 df 上使用 gapply 来获取项目时间条目的时间表。
下面我想得到一个列,该列将根据他们预订时间的最早日期和最晚预订时间之间的工作时间为一个人提供可用的工作时间。
library("dplyr")
library("stringr")
library("bizdays")
library("nlme")
time_df %>% dput()
structure(list(uID = c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), hours = c(39,
39, 39, 39, 19.5, 39, 31.2, 39, 39, 39, 39, 39, 39, 39, 39, 31.2,
39, 39, 39, 39, 31.2, 39, 39, 39, 31.2, 39, 39, 39, 39, 39, 39,
39, 39, 15.6, 39, 39, 39, 23.4, 39, 39, 23.4, 3.9, 3.9, 31.2,
7.8, 3.9, 3.9, 3.9, 3.9, 3.9), onset = structure(c(16090, 16111,
16097, 16083, 16125, 16104, 16076, 16118, 16139, 16216, 16209,
16181, 16167, 16160, 16174, 16188, 16146, 16153, 16265, 16251,
16223, 16244, 16258, 16230, 16237, 16307, 16363, 16328, 16349,
16314, 16335, 16321, 16356, 16391, 16384, 16370, 16398, 16412,
16377, 16405, 16433, 16139, 16160, 16153, 16209, 16251, 16272,
16230, 16342, 16314), class = "Date"), terminus = c("2014-01-24",
"2014-02-14", "2014-01-31", "2014-01-17", "2014-02-28", "2014-02-07",
"2014-01-10", "2014-02-21", "2014-03-14", "2014-05-30", "2014-05-23",
"2014-04-25", "2014-04-11", "2014-04-04", "2014-04-18", "2014-05-02",
"2014-03-21", "2014-03-28", "2014-07-18", "2014-07-04", "2014-06-06",
"2014-06-27", "2014-07-11", "2014-06-13", "2014-06-20", "2014-08-29",
"2014-10-24", "2014-09-19", "2014-10-10", "2014-09-05", "2014-09-26",
"2014-09-12", "2014-10-17", "2014-11-21", "2014-11-14", "2014-10-31",
"2014-11-28", "2014-12-12", "2014-11-07", "2014-12-05", "2014-12-31",
"2014-03-14", "2014-04-04", "2014-03-28", "2014-05-23", "2014-07-04",
"2014-07-25", "2014-06-13", "2014-10-03", "2014-09-05")), row.names = c(NA,
-50L), class = c("tbl_df", "tbl", "data.frame"))
#creating demo calendar to exclude weekends
create.calendar(name = "demo", weekdays = c("saturday","sunday"))
#function should calculate working hours between first time entry and last time entry
#ideally this will be applied to each group
timeentry = function(x){
#creates an end_date variable from further end date in the group
end_date = max(x$terminus)
#creates a start_date from earliest start date in the group
start_date = min(x$onset)
#returns weekdays between star and end
#then multiplies by 8 to get work hours
start_date %>% bizdays(end_date, cal = "demo") * 8
}
#group by uID and summarize
time_group = time_df %>% group_by(uID)
time_util = time_group %>% gapply(.,timeentry, which = c(onset,terminus))
Error in getGroups.data.frame(object, form, level) :
invalid formula for groups
我测试了我的功能以确保它按预期工作。
> time_group %>% timeentry()
[1] 2056
> time_group$terminus %>% max()
[1] "2014-12-31"
> time_group$onset %>% min()
[1] "2014-01-06"
> bizdays("2014-01-06","2014-12-31",cal = "demo") * 8
[1] 1952
我不明白他们怎么能给出不同的输出。
我知道 gapply 和我编写的函数有一些我不太理解的基础知识。 gapply 文档说我应该得到一个数据框输出。我想将该输出与我的原始数据结合起来,以便计算人员的利用率。
如有任何想法,我们将不胜感激。
library(dplyr)
library(bizdays)
#create calendar
create.calendar(name = "demo", weekdays = c("saturday","sunday"))
#calculate working hour
time_df %>%
group_by(uID) %>%
summarise(onset_earliest = min(onset),
terminus_latest = max(terminus),
working_hour = bizdays(onset_earliest, terminus_latest, cal="demo") * 8)
输出是
# A tibble: 2 x 4
uID onset_earliest terminus_latest working_hour
<int> <date> <chr> <dbl>
1 2 2014-01-06 2014-12-31 2056
2 5 2014-03-10 2014-10-03 1192
输入数据预览:
> head(time_df)
# A tibble: 6 x 4
uID hours onset terminus
<int> <dbl> <date> <chr>
1 2 39 2014-01-20 2014-01-24
2 2 39 2014-02-10 2014-02-14
3 2 39 2014-01-27 2014-01-31
4 2 39 2014-01-13 2014-01-17
5 2 19.5 2014-02-24 2014-02-28
6 2 39 2014-02-03 2014-02-07
我正在尝试在分组 df 上使用 gapply 来获取项目时间条目的时间表。
下面我想得到一个列,该列将根据他们预订时间的最早日期和最晚预订时间之间的工作时间为一个人提供可用的工作时间。
library("dplyr")
library("stringr")
library("bizdays")
library("nlme")
time_df %>% dput()
structure(list(uID = c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), hours = c(39,
39, 39, 39, 19.5, 39, 31.2, 39, 39, 39, 39, 39, 39, 39, 39, 31.2,
39, 39, 39, 39, 31.2, 39, 39, 39, 31.2, 39, 39, 39, 39, 39, 39,
39, 39, 15.6, 39, 39, 39, 23.4, 39, 39, 23.4, 3.9, 3.9, 31.2,
7.8, 3.9, 3.9, 3.9, 3.9, 3.9), onset = structure(c(16090, 16111,
16097, 16083, 16125, 16104, 16076, 16118, 16139, 16216, 16209,
16181, 16167, 16160, 16174, 16188, 16146, 16153, 16265, 16251,
16223, 16244, 16258, 16230, 16237, 16307, 16363, 16328, 16349,
16314, 16335, 16321, 16356, 16391, 16384, 16370, 16398, 16412,
16377, 16405, 16433, 16139, 16160, 16153, 16209, 16251, 16272,
16230, 16342, 16314), class = "Date"), terminus = c("2014-01-24",
"2014-02-14", "2014-01-31", "2014-01-17", "2014-02-28", "2014-02-07",
"2014-01-10", "2014-02-21", "2014-03-14", "2014-05-30", "2014-05-23",
"2014-04-25", "2014-04-11", "2014-04-04", "2014-04-18", "2014-05-02",
"2014-03-21", "2014-03-28", "2014-07-18", "2014-07-04", "2014-06-06",
"2014-06-27", "2014-07-11", "2014-06-13", "2014-06-20", "2014-08-29",
"2014-10-24", "2014-09-19", "2014-10-10", "2014-09-05", "2014-09-26",
"2014-09-12", "2014-10-17", "2014-11-21", "2014-11-14", "2014-10-31",
"2014-11-28", "2014-12-12", "2014-11-07", "2014-12-05", "2014-12-31",
"2014-03-14", "2014-04-04", "2014-03-28", "2014-05-23", "2014-07-04",
"2014-07-25", "2014-06-13", "2014-10-03", "2014-09-05")), row.names = c(NA,
-50L), class = c("tbl_df", "tbl", "data.frame"))
#creating demo calendar to exclude weekends
create.calendar(name = "demo", weekdays = c("saturday","sunday"))
#function should calculate working hours between first time entry and last time entry
#ideally this will be applied to each group
timeentry = function(x){
#creates an end_date variable from further end date in the group
end_date = max(x$terminus)
#creates a start_date from earliest start date in the group
start_date = min(x$onset)
#returns weekdays between star and end
#then multiplies by 8 to get work hours
start_date %>% bizdays(end_date, cal = "demo") * 8
}
#group by uID and summarize
time_group = time_df %>% group_by(uID)
time_util = time_group %>% gapply(.,timeentry, which = c(onset,terminus))
Error in getGroups.data.frame(object, form, level) :
invalid formula for groups
我测试了我的功能以确保它按预期工作。
> time_group %>% timeentry()
[1] 2056
> time_group$terminus %>% max()
[1] "2014-12-31"
> time_group$onset %>% min()
[1] "2014-01-06"
> bizdays("2014-01-06","2014-12-31",cal = "demo") * 8
[1] 1952
我不明白他们怎么能给出不同的输出。
我知道 gapply 和我编写的函数有一些我不太理解的基础知识。 gapply 文档说我应该得到一个数据框输出。我想将该输出与我的原始数据结合起来,以便计算人员的利用率。
如有任何想法,我们将不胜感激。
library(dplyr)
library(bizdays)
#create calendar
create.calendar(name = "demo", weekdays = c("saturday","sunday"))
#calculate working hour
time_df %>%
group_by(uID) %>%
summarise(onset_earliest = min(onset),
terminus_latest = max(terminus),
working_hour = bizdays(onset_earliest, terminus_latest, cal="demo") * 8)
输出是
# A tibble: 2 x 4
uID onset_earliest terminus_latest working_hour
<int> <date> <chr> <dbl>
1 2 2014-01-06 2014-12-31 2056
2 5 2014-03-10 2014-10-03 1192
输入数据预览:
> head(time_df)
# A tibble: 6 x 4
uID hours onset terminus
<int> <dbl> <date> <chr>
1 2 39 2014-01-20 2014-01-24
2 2 39 2014-02-10 2014-02-14
3 2 39 2014-01-27 2014-01-31
4 2 39 2014-01-13 2014-01-17
5 2 19.5 2014-02-24 2014-02-28
6 2 39 2014-02-03 2014-02-07