根据时间分配变量
Assign variable based on time
我有一个时间序列,我有添加新变量的特定要求。
这是一些数据
dput(df)
structure(list(Time = structure(c(1567423339.229, 1567423399.018,
1567424218.867, 1567425478.666, 1567425498.883, 1567426519.008,
1567429378.848, 1567429398.979, 1567429978.723, 1567431218.909
), tzone = "", class = c("POSIXct", "POSIXt")), RaceNum = c("1",
"1", "1", "1", "1", "1", "2", "2", "2", "2")), class = "data.frame", row.names = c(NA,
-10L))
我尝试使用 case_when 或 ifelse 不合理地做的是在 1:nrow
的基础上分配 d
除非下一个时间序列事件在 1 分钟内,否则它需要previous 变量并向其添加 b
。如您所见,编号在 RaceNum
更改时再次开始。一旦我建立 d
.
,我将 df 拆分为 RaceNum
然后 cbind
重新组合在一起
这是预期的结果
dput(df2)
structure(list(Time = structure(c(1567423339.229, 1567423399.018,
1567424218.867, 1567425478.666, 1567425498.883, 1567426519.008,
1567429378.848, 1567429398.979, 1567429978.723, 1567431218.909
), tzone = "", class = c("POSIXct", "POSIXt")), RaceNum = c("1",
"1", "1", "1", "1", "1", "2", "2", "2", "2"), d = c("1", "1b",
"2", "3", "3b", "4", "1", "1b", "2", "3")), class = "data.frame", row.names = c(NA,
-10L))
- 为每个
RaceNum
创建一个变量,当连续记录之间的差异大于 1 分钟时该变量递增。
- 对于每个组 (
d
) 将 letters
粘贴到组号。
library(dplyr)
df %>%
group_by(RaceNum) %>%
mutate(d = cumsum(difftime(Time, lag(Time, default = first(Time)),
units = 'min') > 1) + 1) %>%
group_by(d, .add = TRUE) %>%
mutate(d = paste0(d, letters[row_number()]),
#For 1st row remove a from 1a, 2a etc.
d = ifelse(row_number() == 1, sub('a', '', d), d)) %>%
ungroup
# Time RaceNum d
# <dttm> <chr> <chr>
# 1 2019-09-02 19:22:19 1 1
# 2 2019-09-02 19:23:19 1 1b
# 3 2019-09-02 19:36:58 1 2
# 4 2019-09-02 19:57:58 1 3
# 5 2019-09-02 19:58:18 1 3b
# 6 2019-09-02 20:15:19 1 4
# 7 2019-09-02 21:02:58 2 1
# 8 2019-09-02 21:03:18 2 1b
# 9 2019-09-02 21:12:58 2 2
#10 2019-09-02 21:33:38 2 3
我有一个时间序列,我有添加新变量的特定要求。
这是一些数据
dput(df)
structure(list(Time = structure(c(1567423339.229, 1567423399.018,
1567424218.867, 1567425478.666, 1567425498.883, 1567426519.008,
1567429378.848, 1567429398.979, 1567429978.723, 1567431218.909
), tzone = "", class = c("POSIXct", "POSIXt")), RaceNum = c("1",
"1", "1", "1", "1", "1", "2", "2", "2", "2")), class = "data.frame", row.names = c(NA,
-10L))
我尝试使用 case_when 或 ifelse 不合理地做的是在 1:nrow
的基础上分配 d
除非下一个时间序列事件在 1 分钟内,否则它需要previous 变量并向其添加 b
。如您所见,编号在 RaceNum
更改时再次开始。一旦我建立 d
.
RaceNum
然后 cbind
重新组合在一起
这是预期的结果
dput(df2)
structure(list(Time = structure(c(1567423339.229, 1567423399.018,
1567424218.867, 1567425478.666, 1567425498.883, 1567426519.008,
1567429378.848, 1567429398.979, 1567429978.723, 1567431218.909
), tzone = "", class = c("POSIXct", "POSIXt")), RaceNum = c("1",
"1", "1", "1", "1", "1", "2", "2", "2", "2"), d = c("1", "1b",
"2", "3", "3b", "4", "1", "1b", "2", "3")), class = "data.frame", row.names = c(NA,
-10L))
- 为每个
RaceNum
创建一个变量,当连续记录之间的差异大于 1 分钟时该变量递增。 - 对于每个组 (
d
) 将letters
粘贴到组号。
library(dplyr)
df %>%
group_by(RaceNum) %>%
mutate(d = cumsum(difftime(Time, lag(Time, default = first(Time)),
units = 'min') > 1) + 1) %>%
group_by(d, .add = TRUE) %>%
mutate(d = paste0(d, letters[row_number()]),
#For 1st row remove a from 1a, 2a etc.
d = ifelse(row_number() == 1, sub('a', '', d), d)) %>%
ungroup
# Time RaceNum d
# <dttm> <chr> <chr>
# 1 2019-09-02 19:22:19 1 1
# 2 2019-09-02 19:23:19 1 1b
# 3 2019-09-02 19:36:58 1 2
# 4 2019-09-02 19:57:58 1 3
# 5 2019-09-02 19:58:18 1 3b
# 6 2019-09-02 20:15:19 1 4
# 7 2019-09-02 21:02:58 2 1
# 8 2019-09-02 21:03:18 2 1b
# 9 2019-09-02 21:12:58 2 2
#10 2019-09-02 21:33:38 2 3