合并不同长度的长格式数据帧并转换为宽格式
Combine long-format data frames with different length and convert to wide format
由于 time
变量(不平衡面板数据),我想合并不同长度的长格式数据帧:
set.seed(63)
#function to create a data frame that includes id, time and x
func1 <- function (size=5) {
x=sample(c(0,1), 1)
data.frame(time=1:size, x=x)}
#function to row combine data frames
func2 <- do.call("rbind", Map(function(x,y) {
data.frame(id=x, func1(y))
}, 1:5, 5))
#Sample 10 observations to create imbalanced panel data
dd <- func2[sample(nrow(func2), 10), ]
fd <- dd[with(dd, order(id, time)),]
> fd
id time x
1 1 1 0
2 1 2 0
3 1 3 0
4 1 4 0
5 1 5 0
10 2 5 1
13 3 3 0
17 4 2 0
18 4 3 0
21 5 1 0
最后,我想将其转换为宽格式,并根据 time
变量用 NA 填充 x
跳过的单元格。像这样:
id x.time1 x.time2 x.time3 x.time4 x.time5
1 0 0 0 0 0
2 NA NA NA NA 1
3 NA NA 0 NA NA
4 NA 0 0 NA NA
5 0 NA NA NA NA
可能的解决方案:
library(tidyverse)
fd <- data.frame(
id = c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 4L, 4L, 5L),
time = c(1L, 2L, 3L, 4L, 5L, 5L, 3L, 2L, 3L, 1L),
x = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L)
)
fd %>%
pivot_wider(id, names_from = time, values_from = x, names_prefix = "x.time")
#> # A tibble: 5 × 6
#> id x.time1 x.time2 x.time3 x.time4 x.time5
#> <int> <int> <int> <int> <int> <int>
#> 1 1 0 0 0 0 0
#> 2 2 NA NA NA NA 1
#> 3 3 NA NA 0 NA NA
#> 4 4 NA 0 0 NA NA
#> 5 5 0 NA NA NA NA
使用data.table
library(data.table)
dcast(setDT(fd), id ~ paste0('x.time', time), value.var = 'x')
-输出
id x.time1 x.time2 x.time3 x.time4 x.time5
1: 1 0 0 0 0 0
2: 2 NA NA NA NA 1
3: 3 NA NA 0 NA NA
4: 4 NA 0 0 NA NA
5: 5 0 NA NA NA NA
由于 time
变量(不平衡面板数据),我想合并不同长度的长格式数据帧:
set.seed(63)
#function to create a data frame that includes id, time and x
func1 <- function (size=5) {
x=sample(c(0,1), 1)
data.frame(time=1:size, x=x)}
#function to row combine data frames
func2 <- do.call("rbind", Map(function(x,y) {
data.frame(id=x, func1(y))
}, 1:5, 5))
#Sample 10 observations to create imbalanced panel data
dd <- func2[sample(nrow(func2), 10), ]
fd <- dd[with(dd, order(id, time)),]
> fd
id time x
1 1 1 0
2 1 2 0
3 1 3 0
4 1 4 0
5 1 5 0
10 2 5 1
13 3 3 0
17 4 2 0
18 4 3 0
21 5 1 0
最后,我想将其转换为宽格式,并根据 time
变量用 NA 填充 x
跳过的单元格。像这样:
id x.time1 x.time2 x.time3 x.time4 x.time5
1 0 0 0 0 0
2 NA NA NA NA 1
3 NA NA 0 NA NA
4 NA 0 0 NA NA
5 0 NA NA NA NA
可能的解决方案:
library(tidyverse)
fd <- data.frame(
id = c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 4L, 4L, 5L),
time = c(1L, 2L, 3L, 4L, 5L, 5L, 3L, 2L, 3L, 1L),
x = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L)
)
fd %>%
pivot_wider(id, names_from = time, values_from = x, names_prefix = "x.time")
#> # A tibble: 5 × 6
#> id x.time1 x.time2 x.time3 x.time4 x.time5
#> <int> <int> <int> <int> <int> <int>
#> 1 1 0 0 0 0 0
#> 2 2 NA NA NA NA 1
#> 3 3 NA NA 0 NA NA
#> 4 4 NA 0 0 NA NA
#> 5 5 0 NA NA NA NA
使用data.table
library(data.table)
dcast(setDT(fd), id ~ paste0('x.time', time), value.var = 'x')
-输出
id x.time1 x.time2 x.time3 x.time4 x.time5
1: 1 0 0 0 0 0
2: 2 NA NA NA NA 1
3: 3 NA NA 0 NA NA
4: 4 NA 0 0 NA NA
5: 5 0 NA NA NA NA