计算组中的滞后串

Calculate lag string in group

我有一个玩具选举数据库,需要计算在职人数但不能使用分组值和 dplyr::lag

race <- data.frame(city=rep(1,6),
                   date=c(3,3,2,2,1,1),
                   candidate=c("A","B","A","C","D","E"),
                   winner=rep(c(1,0),3))

我做了一个不理想的复杂尝试(因为我必须合并非获胜者:

race %>%
  group_by(city,date) %>% 
  mutate(win_candidate=candidate[winner==1]) %>% 
  filter(winner==1) %>% 
  ungroup() %>%
  group_by(city) %>% 
  mutate(incumbent=lead(win_candidate, n=1, default = NA_character_),
         incumbent=ifelse(candidate==incumbent,1,0)) %>%
  select(-win_candidate)

这个怎么样:

r <- race %>%
  group_by(city,date) %>% 
  summarise(win_candidate = candidate[which(winner== 1)]) %>% 
  ungroup %>% 
  group_by(city) %>% 
  arrange(date) %>% 
  mutate(prev_win_candidate = lag(win_candidate)) %>% 
  left_join(race, .) %>%
  mutate(incumbent = as.numeric(candidate == prev_win_candidate), 
         incumbent = case_when(
           is.na(incumbent) ~ 0, 
           TRUE ~ incumbent)) %>% 
  select(-c(win_candidate, prev_win_candidate))
  
#   city date candidate winner incumbent
# 1    1    3         A      1         1
# 2    1    3         B      0         0
# 3    1    2         A      1         0
# 4    1    2         C      0         0
# 5    1    1         D      1         0
# 6    1    1         E      0         0

date排列数据,对于city中的每个candidate如果前一个winner值为1且连续日期之间的差为1.

library(dplyr)

race %>%
  arrange(city, candidate, date) %>%
  group_by(city, candidate) %>%
  mutate(incumbent = +(lag(winner, default = 0) == 1 & date - lag(date) == 1))%>%
  ungroup

#   city  date candidate winner incumbent
#  <dbl> <dbl> <chr>      <dbl>     <int>
#1     1     2 A              1         0
#2     1     3 A              1         1
#3     1     3 B              0         0
#4     1     2 C              0         0
#5     1     1 D              1         0
#6     1     1 E              0         0