R将列移动为其他列的行

R move columns to be rows of other columns

我正在将混乱的 excel sheet 转换为可用数据。基本结构是重复的变量块,行中有相似的项目。我想移动列,使它们成为第一个列块的行。

很难描述所以这里是mwe。

d1 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres", 
"Time", "HR"), ...2 = c("1", "1", "1", NA, "Walk", NA, NA, NA, 
NA), ...3 = c(NA, NA, NA, NA, NA, NA, "Done", "Time", "HR"), 
    ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA), ...5 = c("p", 
    "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...6 = c("1", 
    "1", "2", NA, "Walk", NA, NA, NA, NA), ...7 = c(NA, NA, NA, 
    NA, NA, NA, "Done", "Time", "HR"), ...8 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), ...9 = c("p", "w", "s", NA, "A Ex", 
    NA, "Pres", "Time", "HR"), ...10 = c("1", "1", "3", NA, "Walk", 
    NA, NA, NA, NA), ...11 = c(NA, NA, NA, NA, NA, NA, "Done", 
    "Time", "HR"), ...12 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame"))

我想移动列 5:8 和 6:12 以便它们成为 1:4 的额外行所以结果是:

df2 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres", 
"Time", "HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", 
"HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...2 = c("1", 
"1", "1", NA, "Walk", NA, NA, NA, NA, "1", "1", "2", NA, "Walk", 
NA, NA, NA, NA, "1", "1", "3", NA, "Walk", NA, NA, NA, NA), ...3 = c(NA, 
NA, NA, NA, NA, NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, 
NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, NA, "Done", "Time", 
"HR"), ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 
-27L), class = c("tbl_df", "tbl", "data.frame"))

基础 R

基本前提是按 4 列为一组拆分:

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
splitframes
# $`0`
# # A tibble: 9 x 4
#   ...1  ...2  ...3  ...4 
#   <chr> <chr> <chr> <lgl>
# 1 p     1     <NA>  NA   
# 2 w     1     <NA>  NA   
# 3 s     1     <NA>  NA   
# 4 <NA>  <NA>  <NA>  NA   
# 5 A Ex  Walk  <NA>  NA   
# 6 <NA>  <NA>  <NA>  NA   
# 7 Pres  <NA>  Done  NA   
# 8 Time  <NA>  Time  NA   
# 9 HR    <NA>  HR    NA   
# $`1`
# # A tibble: 9 x 4
#   ...1  ...2  ...3  ...4 
#   <chr> <chr> <chr> <lgl>
# 1 p     1     <NA>  NA   
# 2 w     1     <NA>  NA

不幸的是,rbind.data.frame 需要所有参数具有相同的名称,

lapply(splitframes, names)
# $`0`
# [1] "...1" "...2" "...3" "...4"
# $`1`
# [1] "...5" "...6" "...7" "...8"
# $`2`
# [1] "...9"  "...10" "...11" "...12"

所以我们需要先解决这个问题:

splitframes <- lapply(splitframes, `colnames<-`, names(splitframes[[1]]))
do.call(rbind, splitframes)
# # A tibble: 27 x 4
#    ...1  ...2  ...3  ...4 
#  * <chr> <chr> <chr> <lgl>
#  1 p     1     <NA>  NA   
#  2 w     1     <NA>  NA   
#  3 s     1     <NA>  NA   
#  4 <NA>  <NA>  <NA>  NA   
#  5 A Ex  Walk  <NA>  NA   
#  6 <NA>  <NA>  <NA>  NA   
#  7 Pres  <NA>  Done  NA   
#  8 Time  <NA>  Time  NA   
#  9 HR    <NA>  HR    NA   
# 10 p     1     <NA>  NA   
# # ... with 17 more rows

dplyr

这个套餐比较贴心:

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
dplyr::bind_rows(splitframes)
# New names:
# * ...5 -> ...1
# * ...6 -> ...2
# * ...7 -> ...3
# * ...8 -> ...4
# New names:
# * ...9 -> ...1
# * ...10 -> ...2
# * ...11 -> ...3
# * ...12 -> ...4
# # A tibble: 27 x 4
#    ...1  ...2  ...3  ...4 
#    <chr> <chr> <chr> <lgl>
#  1 p     1     <NA>  NA   
#  2 w     1     <NA>  NA   
#  3 s     1     <NA>  NA   
#  4 <NA>  <NA>  <NA>  NA   
#  5 A Ex  Walk  <NA>  NA   
#  6 <NA>  <NA>  <NA>  NA   
#  7 Pres  <NA>  Done  NA   
#  8 Time  <NA>  Time  NA   
#  9 HR    <NA>  HR    NA   
# 10 p     1     <NA>  NA   
# # ... with 17 more rows

data.table

splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
library(data.table)
rbindlist(splitframes)
# Column 1 ['...5'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for  backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
#       ...1   ...2   ...3   ...4
#     <char> <char> <char> <lgcl>
#  1:      p      1   <NA>     NA
#  2:      w      1   <NA>     NA
#  3:      s      1   <NA>     NA
#  4:   <NA>   <NA>   <NA>     NA

要使该消息静音,请改用 rbindlist(..., use.names=FALSE)