根据一列值将 data.frame 一分为二
split data.frame in two based on one column values
假设我有一个 data.frame df
如下:
df=data.frame(one=c(1,2,3,4,5,6,7),
two=c('a','a','a','b','b','b','b'),
three=c(1123,33,5566,212,1,90,876))
我需要根据列 two
的值将 df
一分为二,即 a
和 b
。
这是我想要的输出:
one.x two.x three.x one.y two.y three.y
1 a 1123 4 b 212
2 a 33 5 b 1
3 a 5566 6 b 90
NA NA NA 7 b 876
谢谢
这是一个使用 zoo::cbind.zoo
、
的想法
do.call(zoo::cbind.zoo, split(df, df$two))
# one.a two.a three.a one.b two.b three.b
#1 1 a 1123 4 b 212
#2 2 a 33 5 b 1
#3 3 a 5566 6 b 90
#4 <NA> <NA> <NA> 7 b 876
基本 R 选项
lst <- split(df, ~two)
nmax <- max(sapply(lst, nrow))
do.call(
cbind,
lapply(
lst,
function(x) {
k <- nrow(x)
x[k + seq_len(nmax - k), ] <- NA
x
}
)
)
给予
a.one a.two a.three b.one b.two b.three
1 1 a 1123 4 b 212
2 2 a 33 5 b 1
3 3 a 5566 6 b 90
4 NA <NA> NA 7 b 876
一个tidyverse
解决方案:
library(tidyverse)
df=data.frame(one=c(1,2,3,4,5,6,7),
two=c('a','a','a','b','b','b','b'),
three=c(1123,33,5566,212,1,90,876))
df %>%
group_by(two) %>%
add_count %>%
mutate(id = cur_group_id()) %>%
ungroup %>%
mutate(n = max(n)) %>%
group_by(two) %>%
group_split %>%
map_dfc(~ if (nrow(.x) < unique(.x$n)) {add_row(.x)} else {.x} %>%
set_names(., str_c(names(.), unique(.$id)))) %>%
ungroup %>% select(!starts_with(c("n","id")))
#> # A tibble: 4 × 6
#> one two three one2 two2 three2
#> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 1 a 1123 4 b 212
#> 2 2 a 33 5 b 1
#> 3 3 a 5566 6 b 90
#> 4 NA <NA> NA 7 b 876
假设我有一个 data.frame df
如下:
df=data.frame(one=c(1,2,3,4,5,6,7),
two=c('a','a','a','b','b','b','b'),
three=c(1123,33,5566,212,1,90,876))
我需要根据列 two
的值将 df
一分为二,即 a
和 b
。
这是我想要的输出:
one.x two.x three.x one.y two.y three.y
1 a 1123 4 b 212
2 a 33 5 b 1
3 a 5566 6 b 90
NA NA NA 7 b 876
谢谢
这是一个使用 zoo::cbind.zoo
、
do.call(zoo::cbind.zoo, split(df, df$two))
# one.a two.a three.a one.b two.b three.b
#1 1 a 1123 4 b 212
#2 2 a 33 5 b 1
#3 3 a 5566 6 b 90
#4 <NA> <NA> <NA> 7 b 876
基本 R 选项
lst <- split(df, ~two)
nmax <- max(sapply(lst, nrow))
do.call(
cbind,
lapply(
lst,
function(x) {
k <- nrow(x)
x[k + seq_len(nmax - k), ] <- NA
x
}
)
)
给予
a.one a.two a.three b.one b.two b.three
1 1 a 1123 4 b 212
2 2 a 33 5 b 1
3 3 a 5566 6 b 90
4 NA <NA> NA 7 b 876
一个tidyverse
解决方案:
library(tidyverse)
df=data.frame(one=c(1,2,3,4,5,6,7),
two=c('a','a','a','b','b','b','b'),
three=c(1123,33,5566,212,1,90,876))
df %>%
group_by(two) %>%
add_count %>%
mutate(id = cur_group_id()) %>%
ungroup %>%
mutate(n = max(n)) %>%
group_by(two) %>%
group_split %>%
map_dfc(~ if (nrow(.x) < unique(.x$n)) {add_row(.x)} else {.x} %>%
set_names(., str_c(names(.), unique(.$id)))) %>%
ungroup %>% select(!starts_with(c("n","id")))
#> # A tibble: 4 × 6
#> one two three one2 two2 three2
#> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 1 a 1123 4 b 212
#> 2 2 a 33 5 b 1
#> 3 3 a 5566 6 b 90
#> 4 NA <NA> NA 7 b 876