R - 在一个区间上总结 data.frame
R - Summarize data.frame on an interval
我试图在每个星期五对 data.frame 上的变量求和。
随机数据框
mydf = data.frame( "ID" = c( rep( "A" , 6) , rep( "B" , 5 ) ), "Date" = c( "2017-09-08","2017-09-10","2017-09-13","2017-09-15","2017-09-20","2017-09-22","2017-08-03","2017-08-04","2017-08-10","2017-08-11","2017-08-12" , "Var" = c( 1,2,3,4,5,6,7,8,NA,10,11) )
mydf$Date = as.Date( mydf$Date )
mydf = cbind( mydf , "WeekDay" = weekdays( mydf$Date ) )
我想得到什么
df_ToGet =
data.frame(
"ID" = c( rep( "A" , 3) , rep( "B" , 2 ) ),
"Date" = c( "2017-09-08","2017-09-15","2017-09-22","2017-08-04","2017-08-11" ),
"Var_Sum" = c( 1 , 9 , 11 , 15, 10 )
)
我试过的
我考虑过使用 dplyr::summarize 和 aggregate 但我不知道如何设置 by 条件正常。
mydf %>% group_by( ID ) %>% summarize( Var_Sum = aggregate( Var , sum , by=list ( (mydf$Weekday)=="Friday") ) )
我看到一些类似的问题正在使用 cut 函数解决,但这似乎是将条件设置为标准周?我还不是很熟悉
我们需要使用 cumsum
创建分组变量
mydf %>%
slice(seq_len(tail(which(WeekDay== "Friday"), 1))) %>%
group_by(ID, grp = lag(cumsum(WeekDay == "Friday"), default = 0)) %>%
summarise(Date = Date[WeekDay == "Friday"], Var = sum(Var, na.rm = TRUE)) %>%
ungroup() %>%
select(-grp)
# A tibble: 5 x 3
# ID Date Var
# <fctr> <date> <dbl>
#1 A 2017-09-08 1
#2 A 2017-09-15 9
#3 A 2017-09-22 11
#4 B 2017-08-04 15
#5 B 2017-08-11 10
我试图在每个星期五对 data.frame 上的变量求和。
随机数据框
mydf = data.frame( "ID" = c( rep( "A" , 6) , rep( "B" , 5 ) ), "Date" = c( "2017-09-08","2017-09-10","2017-09-13","2017-09-15","2017-09-20","2017-09-22","2017-08-03","2017-08-04","2017-08-10","2017-08-11","2017-08-12" , "Var" = c( 1,2,3,4,5,6,7,8,NA,10,11) )
mydf$Date = as.Date( mydf$Date )
mydf = cbind( mydf , "WeekDay" = weekdays( mydf$Date ) )
我想得到什么
df_ToGet =
data.frame(
"ID" = c( rep( "A" , 3) , rep( "B" , 2 ) ),
"Date" = c( "2017-09-08","2017-09-15","2017-09-22","2017-08-04","2017-08-11" ),
"Var_Sum" = c( 1 , 9 , 11 , 15, 10 )
)
我试过的
我考虑过使用 dplyr::summarize 和 aggregate 但我不知道如何设置 by 条件正常。
mydf %>% group_by( ID ) %>% summarize( Var_Sum = aggregate( Var , sum , by=list ( (mydf$Weekday)=="Friday") ) )
我看到一些类似的问题正在使用 cut 函数解决,但这似乎是将条件设置为标准周?我还不是很熟悉
我们需要使用 cumsum
mydf %>%
slice(seq_len(tail(which(WeekDay== "Friday"), 1))) %>%
group_by(ID, grp = lag(cumsum(WeekDay == "Friday"), default = 0)) %>%
summarise(Date = Date[WeekDay == "Friday"], Var = sum(Var, na.rm = TRUE)) %>%
ungroup() %>%
select(-grp)
# A tibble: 5 x 3
# ID Date Var
# <fctr> <date> <dbl>
#1 A 2017-09-08 1
#2 A 2017-09-15 9
#3 A 2017-09-22 11
#4 B 2017-08-04 15
#5 B 2017-08-11 10