用一个因子对最低日期进行子集化
Subseting the lowest date by a factor
我有以下数据集:
id<-c("1a","1a","1a","1a","1a",
"2a","2a","2a","2a","2a",
"3a","3a","3a","3a","3a")
fch<-c("22/05/2020","12/01/2020","01/01/2019","10/11/2020","01/01/2019",
"10/10/2015","01/01/2015","20/10/2015","08/04/2020","12/12/2019",
"01/05/2020","01/01/2013","10/08/2019","12/01/2020","20/10/2019")
dat<-c(25,35,48,97,112,
65,85,77,89,555,
58,98,25,45,336)
data<-as.data.frame(cbind(id,fch,dat))
我的目的是通过因子“id”提取与最早日期对应的行。
因此我生成的数据框如下所示:
id<-c("1a","2a","3a")
fch<-c("01/01/2019","01/01/2015","01/01/2013")
dat<-c(48,85,98)
data_result<-as.data.frame(cbind(id,fch,dat))
这是我失败的尝试:
DF1 <- data %>%
mutate(fch = as.Date(as.character(data$fch),format="%d/%m/%Y")) %>%
group_by(id) %>%
mutate(fch = min(fch)) %>%
ungroup
我们arrange
将数据按'id',将Date
转换'fch',按'id'分组,用slice_head
得到每组第一行
library(dplyr)
library(lubridate)
data %>%
arrange(id, dmy(fch)) %>%
group_by(id) %>%
slice_head(n = 1) %>%
ungroup
-输出
# A tibble: 3 x 3
# id fch dat
# <chr> <chr> <dbl>
#1 1a 01/01/2019 48
#2 2a 01/01/2015 85
#3 3a 01/01/2013 98
注意:cbind
returns 默认为 matrix
和 matrix
只能有一个类型。相反,我们可以直接创建 data.frame
数据
data <- data.frame(id, fch, dat)
与@akrun 的方法略有不同。请注意,数据中最早的日期之一有两个条目。没有时间就没有办法知道哪个先发生(或者你可能两个都想要?)。
library(tidyverse)
library(lubridate)
data.frame(id = c(rep("1a",5), rep("2a",5), rep("3a",5)),
fch = c("22/05/2020","12/01/2020","01/01/2019","10/11/2020","01/01/2019",
"10/10/2015","01/01/2015","20/10/2015","08/04/2020","12/12/2019",
"01/05/2020","01/01/2013","10/08/2019","12/01/2020","20/10/2019"),
dat = c(25,35,48,97,112,65,85,77,89,555,58,98,25,45,336)) %>%
group_by(id) %>%
mutate(fch = dmy(fch)) %>%
filter(fch == min(fch))
ungroup()
# A tibble: 4 x 3
# Groups: id [3]
id fch dat
<chr> <chr> <dbl>
1 1a 01/01/2019 48
2 1a 01/01/2019 112
3 2a 01/01/2015 85
4 3a 01/01/2013 98
我有以下数据集:
id<-c("1a","1a","1a","1a","1a",
"2a","2a","2a","2a","2a",
"3a","3a","3a","3a","3a")
fch<-c("22/05/2020","12/01/2020","01/01/2019","10/11/2020","01/01/2019",
"10/10/2015","01/01/2015","20/10/2015","08/04/2020","12/12/2019",
"01/05/2020","01/01/2013","10/08/2019","12/01/2020","20/10/2019")
dat<-c(25,35,48,97,112,
65,85,77,89,555,
58,98,25,45,336)
data<-as.data.frame(cbind(id,fch,dat))
我的目的是通过因子“id”提取与最早日期对应的行。
因此我生成的数据框如下所示:
id<-c("1a","2a","3a")
fch<-c("01/01/2019","01/01/2015","01/01/2013")
dat<-c(48,85,98)
data_result<-as.data.frame(cbind(id,fch,dat))
这是我失败的尝试:
DF1 <- data %>%
mutate(fch = as.Date(as.character(data$fch),format="%d/%m/%Y")) %>%
group_by(id) %>%
mutate(fch = min(fch)) %>%
ungroup
我们arrange
将数据按'id',将Date
转换'fch',按'id'分组,用slice_head
得到每组第一行
library(dplyr)
library(lubridate)
data %>%
arrange(id, dmy(fch)) %>%
group_by(id) %>%
slice_head(n = 1) %>%
ungroup
-输出
# A tibble: 3 x 3
# id fch dat
# <chr> <chr> <dbl>
#1 1a 01/01/2019 48
#2 2a 01/01/2015 85
#3 3a 01/01/2013 98
注意:cbind
returns 默认为 matrix
和 matrix
只能有一个类型。相反,我们可以直接创建 data.frame
数据
data <- data.frame(id, fch, dat)
与@akrun 的方法略有不同。请注意,数据中最早的日期之一有两个条目。没有时间就没有办法知道哪个先发生(或者你可能两个都想要?)。
library(tidyverse)
library(lubridate)
data.frame(id = c(rep("1a",5), rep("2a",5), rep("3a",5)),
fch = c("22/05/2020","12/01/2020","01/01/2019","10/11/2020","01/01/2019",
"10/10/2015","01/01/2015","20/10/2015","08/04/2020","12/12/2019",
"01/05/2020","01/01/2013","10/08/2019","12/01/2020","20/10/2019"),
dat = c(25,35,48,97,112,65,85,77,89,555,58,98,25,45,336)) %>%
group_by(id) %>%
mutate(fch = dmy(fch)) %>%
filter(fch == min(fch))
ungroup()
# A tibble: 4 x 3
# Groups: id [3]
id fch dat
<chr> <chr> <dbl>
1 1a 01/01/2019 48
2 1a 01/01/2019 112
3 2a 01/01/2015 85
4 3a 01/01/2013 98