将垂直数据转换为水平数据+提取第一个和最后一个日期
Converting vertical data to horizontal data + extracting first and last date
我是 R 的新手。我有一个数据库,其中 2 列看起来像下面的 table:
pt_id
Date
1222
20-01-2021
1222
18-11-2018
1222
17-02-2015
1222
21-04-2015
2555
18-01-2002
2555
03-04-2009
2555
25-12-2010
我想创建一个合并 pt_id
的新数据框,并创建 2 列,其中仅保存第一个日期和最后一个日期。我希望它看起来像
下面的 table
pt_id
Date_first
Date_last
1222
17-02-2015
20-01-2021
2555
18-01-2002
25-12-2010
上面的table只是一个小例子,我使用的数据库要大得多。
这些是我目前正在使用的软件包:
library(tidyverse)
library(haven)
library(tidyr)
library(dplyr)
library(date)
library(reshape2)
library(foreign)
library(data.table)
library(stringr)
library(plyr)
library(irr)
library(vcd)
library(vctrs)
我希望这是可能的,提前致谢。
你可以这样做:
mydf |>
mutate(Date = lubridate::dmy(Date)) |> # Only use if the variable is currently set to character
group_by(pt_id) |>
filter(Date == min(Date) | Date == max(Date)) |>
mutate(date_vars = if_else(Date == min(Date), "Date_first", "Date_last")) |>
ungroup() |>
pivot_wider(pt_id, values_from = Date, names_from = date_vars)
# A tibble: 2 x 3
pt_id Date_last Date_first
<dbl> <date> <date>
1 1222 2021-01-20 2015-02-17
2 2555 2010-12-25 2002-01-18
我也是 R 的新手,我想我会尝试一下,可能会启发某人纠正我 - 这只使用基础 R 并且有一个非常复杂的 for 循环:
df1$Date <- as.Date(df1$Date)
#Create a new df with a single entry for each patient
new_df <- unique(df1["pt_id"])
#make empty columns for the dates
new_df['date_first'] <- NA
new_df['date_last'] <- NA
#for each patient...
for (i in (1:nrow(new_df))){
#make an empty list to store the list of dates...
pt <- c()
#and for each row in the original dataframe...
for (j in (1:nrow(df1))){
#make another empty list to store the single date for each record
x = c()
# and if the patient ID is in the row being read...
if(df1[j,1]==new_df[i,1]){
#append that date to list x and move on through the original df
x<-append(x,df1[j,2])
}
#then append this list to the list pt...
pt <- append(pt,x)
}
#add the min and max values from the list pt to the new df for each entry in the new dataframe and move on to the next patient in the new dataframe
new_df[i,2] <- min(pt)
new_df[i,3] <- max(pt)
}
#make the columns dates again (as they were converted to numeric in the
max/min)
new_df$date_first <- as.Date(new_df$date_first)
new_df$date_last <- as.Date(new_df$date_last)
print(new_df)
我是 R 的新手。我有一个数据库,其中 2 列看起来像下面的 table:
pt_id | Date |
---|---|
1222 | 20-01-2021 |
1222 | 18-11-2018 |
1222 | 17-02-2015 |
1222 | 21-04-2015 |
2555 | 18-01-2002 |
2555 | 03-04-2009 |
2555 | 25-12-2010 |
我想创建一个合并 pt_id
的新数据框,并创建 2 列,其中仅保存第一个日期和最后一个日期。我希望它看起来像
pt_id | Date_first | Date_last |
---|---|---|
1222 | 17-02-2015 | 20-01-2021 |
2555 | 18-01-2002 | 25-12-2010 |
上面的table只是一个小例子,我使用的数据库要大得多。 这些是我目前正在使用的软件包:
library(tidyverse)
library(haven)
library(tidyr)
library(dplyr)
library(date)
library(reshape2)
library(foreign)
library(data.table)
library(stringr)
library(plyr)
library(irr)
library(vcd)
library(vctrs)
我希望这是可能的,提前致谢。
你可以这样做:
mydf |>
mutate(Date = lubridate::dmy(Date)) |> # Only use if the variable is currently set to character
group_by(pt_id) |>
filter(Date == min(Date) | Date == max(Date)) |>
mutate(date_vars = if_else(Date == min(Date), "Date_first", "Date_last")) |>
ungroup() |>
pivot_wider(pt_id, values_from = Date, names_from = date_vars)
# A tibble: 2 x 3
pt_id Date_last Date_first
<dbl> <date> <date>
1 1222 2021-01-20 2015-02-17
2 2555 2010-12-25 2002-01-18
我也是 R 的新手,我想我会尝试一下,可能会启发某人纠正我 - 这只使用基础 R 并且有一个非常复杂的 for 循环:
df1$Date <- as.Date(df1$Date)
#Create a new df with a single entry for each patient
new_df <- unique(df1["pt_id"])
#make empty columns for the dates
new_df['date_first'] <- NA
new_df['date_last'] <- NA
#for each patient...
for (i in (1:nrow(new_df))){
#make an empty list to store the list of dates...
pt <- c()
#and for each row in the original dataframe...
for (j in (1:nrow(df1))){
#make another empty list to store the single date for each record
x = c()
# and if the patient ID is in the row being read...
if(df1[j,1]==new_df[i,1]){
#append that date to list x and move on through the original df
x<-append(x,df1[j,2])
}
#then append this list to the list pt...
pt <- append(pt,x)
}
#add the min and max values from the list pt to the new df for each entry in the new dataframe and move on to the next patient in the new dataframe
new_df[i,2] <- min(pt)
new_df[i,3] <- max(pt)
}
#make the columns dates again (as they were converted to numeric in the
max/min)
new_df$date_first <- as.Date(new_df$date_first)
new_df$date_last <- as.Date(new_df$date_last)
print(new_df)