R将列移动为其他列的行
R move columns to be rows of other columns
我正在将混乱的 excel sheet 转换为可用数据。基本结构是重复的变量块,行中有相似的项目。我想移动列,使它们成为第一个列块的行。
很难描述所以这里是mwe。
d1 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres",
"Time", "HR"), ...2 = c("1", "1", "1", NA, "Walk", NA, NA, NA,
NA), ...3 = c(NA, NA, NA, NA, NA, NA, "Done", "Time", "HR"),
...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA), ...5 = c("p",
"w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...6 = c("1",
"1", "2", NA, "Walk", NA, NA, NA, NA), ...7 = c(NA, NA, NA,
NA, NA, NA, "Done", "Time", "HR"), ...8 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA), ...9 = c("p", "w", "s", NA, "A Ex",
NA, "Pres", "Time", "HR"), ...10 = c("1", "1", "3", NA, "Walk",
NA, NA, NA, NA), ...11 = c(NA, NA, NA, NA, NA, NA, "Done",
"Time", "HR"), ...12 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl",
"data.frame"))
我想移动列 5:8 和 6:12 以便它们成为 1:4 的额外行所以结果是:
df2 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres",
"Time", "HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time",
"HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...2 = c("1",
"1", "1", NA, "Walk", NA, NA, NA, NA, "1", "1", "2", NA, "Walk",
NA, NA, NA, NA, "1", "1", "3", NA, "Walk", NA, NA, NA, NA), ...3 = c(NA,
NA, NA, NA, NA, NA, "Done", "Time", "HR", NA, NA, NA, NA, NA,
NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, NA, "Done", "Time",
"HR"), ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-27L), class = c("tbl_df", "tbl", "data.frame"))
基础 R
基本前提是按 4 列为一组拆分:
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
splitframes
# $`0`
# # A tibble: 9 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# $`1`
# # A tibble: 9 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
不幸的是,rbind.data.frame
需要所有参数具有相同的名称,
lapply(splitframes, names)
# $`0`
# [1] "...1" "...2" "...3" "...4"
# $`1`
# [1] "...5" "...6" "...7" "...8"
# $`2`
# [1] "...9" "...10" "...11" "...12"
所以我们需要先解决这个问题:
splitframes <- lapply(splitframes, `colnames<-`, names(splitframes[[1]]))
do.call(rbind, splitframes)
# # A tibble: 27 x 4
# ...1 ...2 ...3 ...4
# * <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# 10 p 1 <NA> NA
# # ... with 17 more rows
dplyr
这个套餐比较贴心:
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
dplyr::bind_rows(splitframes)
# New names:
# * ...5 -> ...1
# * ...6 -> ...2
# * ...7 -> ...3
# * ...8 -> ...4
# New names:
# * ...9 -> ...1
# * ...10 -> ...2
# * ...11 -> ...3
# * ...12 -> ...4
# # A tibble: 27 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# 10 p 1 <NA> NA
# # ... with 17 more rows
data.table
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
library(data.table)
rbindlist(splitframes)
# Column 1 ['...5'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
# ...1 ...2 ...3 ...4
# <char> <char> <char> <lgcl>
# 1: p 1 <NA> NA
# 2: w 1 <NA> NA
# 3: s 1 <NA> NA
# 4: <NA> <NA> <NA> NA
要使该消息静音,请改用 rbindlist(..., use.names=FALSE)
。
我正在将混乱的 excel sheet 转换为可用数据。基本结构是重复的变量块,行中有相似的项目。我想移动列,使它们成为第一个列块的行。
很难描述所以这里是mwe。
d1 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres",
"Time", "HR"), ...2 = c("1", "1", "1", NA, "Walk", NA, NA, NA,
NA), ...3 = c(NA, NA, NA, NA, NA, NA, "Done", "Time", "HR"),
...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA), ...5 = c("p",
"w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...6 = c("1",
"1", "2", NA, "Walk", NA, NA, NA, NA), ...7 = c(NA, NA, NA,
NA, NA, NA, "Done", "Time", "HR"), ...8 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA), ...9 = c("p", "w", "s", NA, "A Ex",
NA, "Pres", "Time", "HR"), ...10 = c("1", "1", "3", NA, "Walk",
NA, NA, NA, NA), ...11 = c(NA, NA, NA, NA, NA, NA, "Done",
"Time", "HR"), ...12 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl",
"data.frame"))
我想移动列 5:8 和 6:12 以便它们成为 1:4 的额外行所以结果是:
df2 = structure(list(...1 = c("p", "w", "s", NA, "A Ex", NA, "Pres",
"Time", "HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time",
"HR", "p", "w", "s", NA, "A Ex", NA, "Pres", "Time", "HR"), ...2 = c("1",
"1", "1", NA, "Walk", NA, NA, NA, NA, "1", "1", "2", NA, "Walk",
NA, NA, NA, NA, "1", "1", "3", NA, "Walk", NA, NA, NA, NA), ...3 = c(NA,
NA, NA, NA, NA, NA, "Done", "Time", "HR", NA, NA, NA, NA, NA,
NA, "Done", "Time", "HR", NA, NA, NA, NA, NA, NA, "Done", "Time",
"HR"), ...4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-27L), class = c("tbl_df", "tbl", "data.frame"))
基础 R
基本前提是按 4 列为一组拆分:
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
splitframes
# $`0`
# # A tibble: 9 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# $`1`
# # A tibble: 9 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
不幸的是,rbind.data.frame
需要所有参数具有相同的名称,
lapply(splitframes, names)
# $`0`
# [1] "...1" "...2" "...3" "...4"
# $`1`
# [1] "...5" "...6" "...7" "...8"
# $`2`
# [1] "...9" "...10" "...11" "...12"
所以我们需要先解决这个问题:
splitframes <- lapply(splitframes, `colnames<-`, names(splitframes[[1]]))
do.call(rbind, splitframes)
# # A tibble: 27 x 4
# ...1 ...2 ...3 ...4
# * <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# 10 p 1 <NA> NA
# # ... with 17 more rows
dplyr
这个套餐比较贴心:
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
dplyr::bind_rows(splitframes)
# New names:
# * ...5 -> ...1
# * ...6 -> ...2
# * ...7 -> ...3
# * ...8 -> ...4
# New names:
# * ...9 -> ...1
# * ...10 -> ...2
# * ...11 -> ...3
# * ...12 -> ...4
# # A tibble: 27 x 4
# ...1 ...2 ...3 ...4
# <chr> <chr> <chr> <lgl>
# 1 p 1 <NA> NA
# 2 w 1 <NA> NA
# 3 s 1 <NA> NA
# 4 <NA> <NA> <NA> NA
# 5 A Ex Walk <NA> NA
# 6 <NA> <NA> <NA> NA
# 7 Pres <NA> Done NA
# 8 Time <NA> Time NA
# 9 HR <NA> HR NA
# 10 p 1 <NA> NA
# # ... with 17 more rows
data.table
splitframes <- split.default(d1, (seq_along(d1)-1) %/% 4)
library(data.table)
rbindlist(splitframes)
# Column 1 ['...5'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
# ...1 ...2 ...3 ...4
# <char> <char> <char> <lgcl>
# 1: p 1 <NA> NA
# 2: w 1 <NA> NA
# 3: s 1 <NA> NA
# 4: <NA> <NA> <NA> NA
要使该消息静音,请改用 rbindlist(..., use.names=FALSE)
。