是否可以 dplyr::lag 前一年而不是前一行?
Is it possible to dplyr::lag the previous year and not the previous row?
这是我的一小部分数据
data = data.frame(
Year = c("1994", "1995", "1996", "1997", "1998", "1999", "2000", "2001", "2004", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2017", "2017", "2017", "2018"),
RepYear = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "2", "3", "1", "NA"),
Id = c("A013", "A013", "A013", "A013", "A013", "A013", "A013", "A013", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633")
)
Year RepYear Id
1 1994 NA A013
2 1995 NA A013
3 1996 NA A013
4 1997 NA A013
5 1998 NA A013
6 1999 NA A013
7 2000 NA A013
8 2001 NA A013
9 2004 NA J633
10 2006 NA J633
11 2007 NA J633
12 2008 NA J633
13 2009 NA J633
14 2010 NA J633
15 2011 NA J633
16 2012 NA J633
17 2013 NA J633
18 2014 NA J633
19 2017 2 J633
20 2017 3 J633
21 2017 1 J633
22 2018 NA J633
这就是我想用 dplyr::lag
完成的
Year RepYear Id PreviousYear
1 1994 NA A013 <NA>
2 1995 NA A013 1994
3 1996 NA A013 1995
4 1997 NA A013 1996
5 1998 NA A013 1997
6 1999 NA A013 1998
7 2000 NA A013 1999
8 2001 NA A013 2000
9 2004 NA J633 <NA>
10 2006 NA J633 2004
11 2007 NA J633 2006
12 2008 NA J633 2007
13 2009 NA J633 2008
14 2010 NA J633 2009
15 2011 NA J633 2010
16 2012 NA J633 2011
17 2013 NA J633 2012
18 2014 NA J633 2013
19 2017 2 J633 2014
20 2017 3 J633 2014
21 2017 1 J633 2014
22 2018 NA J633 2017
问题是年份像第 20 行和第 21 行一样重复,因为我想要 previousyear = 2014 而不是上一行 2017
这是我试过的:
data %>% arrange(Id, Year) %>%
group_by(Id) %>%
mutate(PreviousYear = lag(Year, 1)) %>%
mutate(PreviousYear = if_else(Year == lag(Year), lag(PreviousYear, 1), PreviousYear)) %>% # Fix issue created by reapeted year
mutate(PreviousYear = if_else(Year == lag(Year), lag(PreviousYear, 1), PreviousYear)) # idem
但这非常笨拙,因为显然我需要重复两次函数变异来修复两行...
提前致谢
我.
一种方法是仅保留 Id
和 Year
的值,然后取 lag
。然后,您可以将这个滞后的数据帧连接到原始数据帧以保持行数相同。
library(dplyr)
data %>%
distinct(Id, Year) %>%
group_by(Id) %>%
mutate(prev_year = lag(Year)) %>%
left_join(data, by = c('Year', 'Id'))
# Year Id prev_year RepYear
#1 1994 A013 <NA> NA
#2 1995 A013 1994 NA
#3 1996 A013 1995 NA
#4 1997 A013 1996 NA
#5 1998 A013 1997 NA
#6 1999 A013 1998 NA
#7 2000 A013 1999 NA
#8 2001 A013 2000 NA
#9 2004 J633 <NA> NA
#10 2006 J633 2004 NA
#11 2007 J633 2006 NA
#12 2008 J633 2007 NA
#13 2009 J633 2008 NA
#14 2010 J633 2009 NA
#15 2011 J633 2010 NA
#16 2012 J633 2011 NA
#17 2013 J633 2012 NA
#18 2014 J633 2013 NA
#19 2017 J633 2014 2
#20 2017 J633 2014 3
#21 2017 J633 2014 1
#22 2018 J633 2017 NA
附加解决方案
df %>%
group_nest(Id, Year) %>%
group_by(Id) %>%
mutate(py = lag(Year)) %>%
unnest(data) %>%
ungroup()
# A tibble: 22 x 4
Id Year RepYear py
<chr> <chr> <chr> <chr>
1 A013 1994 NA NA
2 A013 1995 NA 1994
3 A013 1996 NA 1995
4 A013 1997 NA 1996
5 A013 1998 NA 1997
6 A013 1999 NA 1998
7 A013 2000 NA 1999
8 A013 2001 NA 2000
9 J633 2004 NA NA
10 J633 2006 NA 2004
# ... with 12 more rows
这是我的一小部分数据
data = data.frame(
Year = c("1994", "1995", "1996", "1997", "1998", "1999", "2000", "2001", "2004", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2017", "2017", "2017", "2018"),
RepYear = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "2", "3", "1", "NA"),
Id = c("A013", "A013", "A013", "A013", "A013", "A013", "A013", "A013", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633", "J633")
)
Year RepYear Id
1 1994 NA A013
2 1995 NA A013
3 1996 NA A013
4 1997 NA A013
5 1998 NA A013
6 1999 NA A013
7 2000 NA A013
8 2001 NA A013
9 2004 NA J633
10 2006 NA J633
11 2007 NA J633
12 2008 NA J633
13 2009 NA J633
14 2010 NA J633
15 2011 NA J633
16 2012 NA J633
17 2013 NA J633
18 2014 NA J633
19 2017 2 J633
20 2017 3 J633
21 2017 1 J633
22 2018 NA J633
这就是我想用 dplyr::lag
完成的 Year RepYear Id PreviousYear
1 1994 NA A013 <NA>
2 1995 NA A013 1994
3 1996 NA A013 1995
4 1997 NA A013 1996
5 1998 NA A013 1997
6 1999 NA A013 1998
7 2000 NA A013 1999
8 2001 NA A013 2000
9 2004 NA J633 <NA>
10 2006 NA J633 2004
11 2007 NA J633 2006
12 2008 NA J633 2007
13 2009 NA J633 2008
14 2010 NA J633 2009
15 2011 NA J633 2010
16 2012 NA J633 2011
17 2013 NA J633 2012
18 2014 NA J633 2013
19 2017 2 J633 2014
20 2017 3 J633 2014
21 2017 1 J633 2014
22 2018 NA J633 2017
问题是年份像第 20 行和第 21 行一样重复,因为我想要 previousyear = 2014 而不是上一行 2017
这是我试过的:
data %>% arrange(Id, Year) %>%
group_by(Id) %>%
mutate(PreviousYear = lag(Year, 1)) %>%
mutate(PreviousYear = if_else(Year == lag(Year), lag(PreviousYear, 1), PreviousYear)) %>% # Fix issue created by reapeted year
mutate(PreviousYear = if_else(Year == lag(Year), lag(PreviousYear, 1), PreviousYear)) # idem
但这非常笨拙,因为显然我需要重复两次函数变异来修复两行...
提前致谢
我.
一种方法是仅保留 Id
和 Year
的值,然后取 lag
。然后,您可以将这个滞后的数据帧连接到原始数据帧以保持行数相同。
library(dplyr)
data %>%
distinct(Id, Year) %>%
group_by(Id) %>%
mutate(prev_year = lag(Year)) %>%
left_join(data, by = c('Year', 'Id'))
# Year Id prev_year RepYear
#1 1994 A013 <NA> NA
#2 1995 A013 1994 NA
#3 1996 A013 1995 NA
#4 1997 A013 1996 NA
#5 1998 A013 1997 NA
#6 1999 A013 1998 NA
#7 2000 A013 1999 NA
#8 2001 A013 2000 NA
#9 2004 J633 <NA> NA
#10 2006 J633 2004 NA
#11 2007 J633 2006 NA
#12 2008 J633 2007 NA
#13 2009 J633 2008 NA
#14 2010 J633 2009 NA
#15 2011 J633 2010 NA
#16 2012 J633 2011 NA
#17 2013 J633 2012 NA
#18 2014 J633 2013 NA
#19 2017 J633 2014 2
#20 2017 J633 2014 3
#21 2017 J633 2014 1
#22 2018 J633 2017 NA
附加解决方案
df %>%
group_nest(Id, Year) %>%
group_by(Id) %>%
mutate(py = lag(Year)) %>%
unnest(data) %>%
ungroup()
# A tibble: 22 x 4
Id Year RepYear py
<chr> <chr> <chr> <chr>
1 A013 1994 NA NA
2 A013 1995 NA 1994
3 A013 1996 NA 1995
4 A013 1997 NA 1996
5 A013 1998 NA 1997
6 A013 1999 NA 1998
7 A013 2000 NA 1999
8 A013 2001 NA 2000
9 J633 2004 NA NA
10 J633 2006 NA 2004
# ... with 12 more rows