对每个 id 重复第一次观察
repeat first observation for each id
我有一个数据集如下
Obs# Id Date Med Dosage Result
1 1567 01/03/2011 Acebutol 10mg 100.2
2 1567 04/02/2011 Acebutol 10mg 98.6
3 1567 08/14/2011 Oxaprozin 20mg 99.34
4 1567 08/14/2011 Bayer 20mg 99.34
5 7845 02/01/2011 Bayer 20mg 89.64
6 7845 06/14/2011 Bayer 20mg 95.41
7 7845 01/06/2012 Bayer 50mg 89.92
8 7845 01/06/2012 Acebutol 50mg 89.92
9 7845 04/19/2012 Bayer 50mg 95.15
10 7845 09/25/2012 Bayer 50mg 99.37
11 1567 01/14/2012 Oxaprozin 20mg 89.34
12 1567 05/12/2012 Oxaprozin 20mg 91.4
Test2 <- structure(list(Obs. = 1:12, Id = c(1567L, 1567L, 1567L, 1567L,
7845L, 7845L, 7845L, 7845L, 7845L, 7845L, 1567L, 1567L), Date =
structure(c(1L,5L, 9L, 9L, 4L, 8L, 2L, 2L, 6L, 10L, 3L, 7L), .Label =
c("01/03/2011", "01/06/2012", "01/14/2012", "02/01/2011", "04/02/2011",
"04/19/2012","05/12/2012", "06/14/2011", "08/14/2011", "09/25/2012"), class
= "factor"),Med = structure(c(1L, 1L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
3L, 3L), .Label = c("Acebutol", "Bayer", "Oxaprozin"), class = "factor"),
Dosage = structure(c(3L, 3L, 4L, 1L, 1L, 1L, 2L, 5L, 2L,
2L, 4L, 4L), .Label = c(" 20mg", " 50mg", "10mg", "20mg",
"50mg"), class = "factor"), Result = c(100.2, 98.6, 99.34,
99.34, 89.64, 95.41, 89.92, 89.92, 95.15, 99.37, 89.34, 91.4
)), .Names = c("Obs.", "Id", "Date", "Med", "Dosage", "Result"
), class = "data.frame", row.names = c(NA, -12L))
我正在使用传播函数按如下方式转换此数据集
library(dplyr)
library(tidyr)
library(doBy)
Test2$X <- NULL
Test2$Obs. <- NULL
Test21 = Test2 %>% spread(Med, Dosage, fill = 0)
Test22 = Test21 %>% rename(Date2 = Date) %>% mutate(Date1 = NA)
Test22$Date2 = as.Date(Test22$Date2, "%m/%d/%Y")
Test22 = orderBy(~Id+Date2, data=Test22)
Id Date2 Result Acebutol Bayer Oxaprozin Date1
1567 2011-01-03 100.20 10mg 0 0 NA
1567 2011-04-02 98.60 10mg 0 0 NA
1567 2011-08-14 99.34 0 20mg 20mg NA
1567 2012-01-14 89.34 0 0 20mg NA
1567 2012-05-12 91.40 0 0 20mg NA
7845 2011-02-01 89.64 0 20mg 0 NA
7845 2011-06-14 95.41 0 20mg 0 NA
7845 2012-01-06 89.92 50mg 50mg 0 NA
7845 2012-04-19 95.15 0 50mg 0 NA
7845 2012-09-25 99.37 0 50mg 0 NA
我想做的是对每个 Id 重复第一次观察
Id Date2 Result Acebutol Bayer Oxaprozin Date1
**1567 2011-01-03 100.20 0 0 0 NA
1567 2011-01-03 100.20 10mg 0 0 NA
1567 2011-04-02 98.60 10mg 0 0 NA
1567 2011-08-14 99.34 0 20mg 20mg NA
1567 2012-01-14 89.34 0 0 20mg NA
1567 2012-05-12 91.40 0 0 20mg NA
**7845 2011-02-01 89.64 0 0 0 NA
7845 2011-02-01 89.64 0 20mg 0 NA
7845 2011-06-14 95.41 0 20mg 0 NA
7845 2012-01-06 89.92 50mg 50mg 0 NA
7845 2012-04-19 95.15 0 50mg 0 NA
7845 2012-09-25 99.37 0 50mg 0 NA
不确定如何操作,如有任何帮助,我们将不胜感激。
是这样的吗?
TestNew <- Test22 %>%
group_by(Id) %>%
arrange(Date2) %>%
filter(row_number(Date2) == 1) %>%
bind_rows(Test2) %>%
arrange(Id, Date2)
Test22 %>%
group_by(Id) %>%
slice(1) %>% # select the first row of each ID
mutate_each(funs(set0 = c("0")), 4:6) %>% # recode columns 4-6 as zero
bind_rows(., Test22) %>% # rbind the new zeroed-out top rows to the original
arrange(Id, Date2, Result) # sort back to the preferred order
Source: local data frame [12 x 7]
Id Date2 Result Acebutol Bayer Oxaprozin Date1
1 1567 2011-01-03 100.20 0 0 0 NA
2 1567 2011-01-03 100.20 10mg NA NA NA
3 1567 2011-04-02 98.60 10mg NA NA NA
4 1567 2011-08-14 99.34 NA 20mg 20mg NA
5 1567 2012-01-14 89.34 NA NA 20mg NA
6 1567 2012-05-12 91.40 NA NA 20mg NA
7 7845 2011-02-01 89.64 0 0 0 NA
8 7845 2011-02-01 89.64 NA 20mg NA NA
9 7845 2011-06-14 95.41 NA 20mg NA NA
10 7845 2012-01-06 89.92 50mg 50mg NA NA
11 7845 2012-04-19 95.15 NA 50mg NA NA
12 7845 2012-09-25 99.37 NA 50mg NA NA
我有一个数据集如下
Obs# Id Date Med Dosage Result
1 1567 01/03/2011 Acebutol 10mg 100.2
2 1567 04/02/2011 Acebutol 10mg 98.6
3 1567 08/14/2011 Oxaprozin 20mg 99.34
4 1567 08/14/2011 Bayer 20mg 99.34
5 7845 02/01/2011 Bayer 20mg 89.64
6 7845 06/14/2011 Bayer 20mg 95.41
7 7845 01/06/2012 Bayer 50mg 89.92
8 7845 01/06/2012 Acebutol 50mg 89.92
9 7845 04/19/2012 Bayer 50mg 95.15
10 7845 09/25/2012 Bayer 50mg 99.37
11 1567 01/14/2012 Oxaprozin 20mg 89.34
12 1567 05/12/2012 Oxaprozin 20mg 91.4
Test2 <- structure(list(Obs. = 1:12, Id = c(1567L, 1567L, 1567L, 1567L,
7845L, 7845L, 7845L, 7845L, 7845L, 7845L, 1567L, 1567L), Date =
structure(c(1L,5L, 9L, 9L, 4L, 8L, 2L, 2L, 6L, 10L, 3L, 7L), .Label =
c("01/03/2011", "01/06/2012", "01/14/2012", "02/01/2011", "04/02/2011",
"04/19/2012","05/12/2012", "06/14/2011", "08/14/2011", "09/25/2012"), class
= "factor"),Med = structure(c(1L, 1L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
3L, 3L), .Label = c("Acebutol", "Bayer", "Oxaprozin"), class = "factor"),
Dosage = structure(c(3L, 3L, 4L, 1L, 1L, 1L, 2L, 5L, 2L,
2L, 4L, 4L), .Label = c(" 20mg", " 50mg", "10mg", "20mg",
"50mg"), class = "factor"), Result = c(100.2, 98.6, 99.34,
99.34, 89.64, 95.41, 89.92, 89.92, 95.15, 99.37, 89.34, 91.4
)), .Names = c("Obs.", "Id", "Date", "Med", "Dosage", "Result"
), class = "data.frame", row.names = c(NA, -12L))
我正在使用传播函数按如下方式转换此数据集
library(dplyr)
library(tidyr)
library(doBy)
Test2$X <- NULL
Test2$Obs. <- NULL
Test21 = Test2 %>% spread(Med, Dosage, fill = 0)
Test22 = Test21 %>% rename(Date2 = Date) %>% mutate(Date1 = NA)
Test22$Date2 = as.Date(Test22$Date2, "%m/%d/%Y")
Test22 = orderBy(~Id+Date2, data=Test22)
Id Date2 Result Acebutol Bayer Oxaprozin Date1
1567 2011-01-03 100.20 10mg 0 0 NA
1567 2011-04-02 98.60 10mg 0 0 NA
1567 2011-08-14 99.34 0 20mg 20mg NA
1567 2012-01-14 89.34 0 0 20mg NA
1567 2012-05-12 91.40 0 0 20mg NA
7845 2011-02-01 89.64 0 20mg 0 NA
7845 2011-06-14 95.41 0 20mg 0 NA
7845 2012-01-06 89.92 50mg 50mg 0 NA
7845 2012-04-19 95.15 0 50mg 0 NA
7845 2012-09-25 99.37 0 50mg 0 NA
我想做的是对每个 Id 重复第一次观察
Id Date2 Result Acebutol Bayer Oxaprozin Date1
**1567 2011-01-03 100.20 0 0 0 NA
1567 2011-01-03 100.20 10mg 0 0 NA
1567 2011-04-02 98.60 10mg 0 0 NA
1567 2011-08-14 99.34 0 20mg 20mg NA
1567 2012-01-14 89.34 0 0 20mg NA
1567 2012-05-12 91.40 0 0 20mg NA
**7845 2011-02-01 89.64 0 0 0 NA
7845 2011-02-01 89.64 0 20mg 0 NA
7845 2011-06-14 95.41 0 20mg 0 NA
7845 2012-01-06 89.92 50mg 50mg 0 NA
7845 2012-04-19 95.15 0 50mg 0 NA
7845 2012-09-25 99.37 0 50mg 0 NA
不确定如何操作,如有任何帮助,我们将不胜感激。
是这样的吗?
TestNew <- Test22 %>%
group_by(Id) %>%
arrange(Date2) %>%
filter(row_number(Date2) == 1) %>%
bind_rows(Test2) %>%
arrange(Id, Date2)
Test22 %>%
group_by(Id) %>%
slice(1) %>% # select the first row of each ID
mutate_each(funs(set0 = c("0")), 4:6) %>% # recode columns 4-6 as zero
bind_rows(., Test22) %>% # rbind the new zeroed-out top rows to the original
arrange(Id, Date2, Result) # sort back to the preferred order
Source: local data frame [12 x 7]
Id Date2 Result Acebutol Bayer Oxaprozin Date1
1 1567 2011-01-03 100.20 0 0 0 NA
2 1567 2011-01-03 100.20 10mg NA NA NA
3 1567 2011-04-02 98.60 10mg NA NA NA
4 1567 2011-08-14 99.34 NA 20mg 20mg NA
5 1567 2012-01-14 89.34 NA NA 20mg NA
6 1567 2012-05-12 91.40 NA NA 20mg NA
7 7845 2011-02-01 89.64 0 0 0 NA
8 7845 2011-02-01 89.64 NA 20mg NA NA
9 7845 2011-06-14 95.41 NA 20mg NA NA
10 7845 2012-01-06 89.92 50mg 50mg NA NA
11 7845 2012-04-19 95.15 NA 50mg NA NA
12 7845 2012-09-25 99.37 NA 50mg NA NA