计算组中的滞后串
Calculate lag string in group
我有一个玩具选举数据库,需要计算在职人数但不能使用分组值和 dplyr::lag
race <- data.frame(city=rep(1,6),
date=c(3,3,2,2,1,1),
candidate=c("A","B","A","C","D","E"),
winner=rep(c(1,0),3))
我做了一个不理想的复杂尝试(因为我必须合并非获胜者:
race %>%
group_by(city,date) %>%
mutate(win_candidate=candidate[winner==1]) %>%
filter(winner==1) %>%
ungroup() %>%
group_by(city) %>%
mutate(incumbent=lead(win_candidate, n=1, default = NA_character_),
incumbent=ifelse(candidate==incumbent,1,0)) %>%
select(-win_candidate)
这个怎么样:
r <- race %>%
group_by(city,date) %>%
summarise(win_candidate = candidate[which(winner== 1)]) %>%
ungroup %>%
group_by(city) %>%
arrange(date) %>%
mutate(prev_win_candidate = lag(win_candidate)) %>%
left_join(race, .) %>%
mutate(incumbent = as.numeric(candidate == prev_win_candidate),
incumbent = case_when(
is.na(incumbent) ~ 0,
TRUE ~ incumbent)) %>%
select(-c(win_candidate, prev_win_candidate))
# city date candidate winner incumbent
# 1 1 3 A 1 1
# 2 1 3 B 0 0
# 3 1 2 A 1 0
# 4 1 2 C 0 0
# 5 1 1 D 1 0
# 6 1 1 E 0 0
按date
排列数据,对于city
中的每个candidate
如果前一个winner
值为1且连续日期之间的差为1.
library(dplyr)
race %>%
arrange(city, candidate, date) %>%
group_by(city, candidate) %>%
mutate(incumbent = +(lag(winner, default = 0) == 1 & date - lag(date) == 1))%>%
ungroup
# city date candidate winner incumbent
# <dbl> <dbl> <chr> <dbl> <int>
#1 1 2 A 1 0
#2 1 3 A 1 1
#3 1 3 B 0 0
#4 1 2 C 0 0
#5 1 1 D 1 0
#6 1 1 E 0 0
我有一个玩具选举数据库,需要计算在职人数但不能使用分组值和 dplyr::lag
race <- data.frame(city=rep(1,6),
date=c(3,3,2,2,1,1),
candidate=c("A","B","A","C","D","E"),
winner=rep(c(1,0),3))
我做了一个不理想的复杂尝试(因为我必须合并非获胜者:
race %>%
group_by(city,date) %>%
mutate(win_candidate=candidate[winner==1]) %>%
filter(winner==1) %>%
ungroup() %>%
group_by(city) %>%
mutate(incumbent=lead(win_candidate, n=1, default = NA_character_),
incumbent=ifelse(candidate==incumbent,1,0)) %>%
select(-win_candidate)
这个怎么样:
r <- race %>%
group_by(city,date) %>%
summarise(win_candidate = candidate[which(winner== 1)]) %>%
ungroup %>%
group_by(city) %>%
arrange(date) %>%
mutate(prev_win_candidate = lag(win_candidate)) %>%
left_join(race, .) %>%
mutate(incumbent = as.numeric(candidate == prev_win_candidate),
incumbent = case_when(
is.na(incumbent) ~ 0,
TRUE ~ incumbent)) %>%
select(-c(win_candidate, prev_win_candidate))
# city date candidate winner incumbent
# 1 1 3 A 1 1
# 2 1 3 B 0 0
# 3 1 2 A 1 0
# 4 1 2 C 0 0
# 5 1 1 D 1 0
# 6 1 1 E 0 0
按date
排列数据,对于city
中的每个candidate
如果前一个winner
值为1且连续日期之间的差为1.
library(dplyr)
race %>%
arrange(city, candidate, date) %>%
group_by(city, candidate) %>%
mutate(incumbent = +(lag(winner, default = 0) == 1 & date - lag(date) == 1))%>%
ungroup
# city date candidate winner incumbent
# <dbl> <dbl> <chr> <dbl> <int>
#1 1 2 A 1 0
#2 1 3 A 1 1
#3 1 3 B 0 0
#4 1 2 C 0 0
#5 1 1 D 1 0
#6 1 1 E 0 0