根据 R 中第一列中的重复行重塑
Reshape based on repeating rows in first column in R
我想知道如何重塑以下数据集:
这是生成以下内容的代码:
data.frame(Variable = c("Date","Location_1","Location_2","Date","Location_1","Location_2"),
Monday = c("7/1/20","1","2","7/3/20","1","2"),
Tuesday = c("7/2/20","5","2","7/4/20","5","2"))
Variable Monday Tuesday
Date 7/1/20 7/2/20
Location_1 1 5
Location_2 2 2
Date 7/3/20 7/4/20
Location_1 1 5
Location_2 2 2
进入
Date Location_1 Location_2
7/1/20 1 2
7/2/20 5 2
7/3/20 1 2
7/4/20 5 2
我不确定为什么我在正确使用转置方面遇到困难,但这似乎是我所缺少的一个简单的解决方案。任何帮助将不胜感激。
谢谢!
获取长格式数据,创建标识符列并返回宽格式。
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -Variable) %>%
select(-name) %>%
group_by(Variable) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = Variable, values_from = value) %>%
select(-row)
# Date Location_1 Location_2
# <chr> <chr> <chr>
#1 7/1/20 1 2
#2 7/2/20 5 2
#3 7/3/20 1 2
#4 7/4/20 5 2
在 data.table
使用 melt
+ dcast
library(data.table)
dcast(melt(setDT(df), id.vars = 'Variable'), rowid(Variable)~Variable,
value.var = 'value')
使用 base R 你可以这样做:
df_t <- as.data.frame(t(df))[-1, ]
names(df_t) <- df$Variable
reshape(df_t,
varying = list(grep('Date', names(df_t), value = T),
grep('Location_1', names(df_t), value = T),
grep('Location_2', names(df_t), value = T)),
v.names = c('Date', 'Location_1', 'Location_2'),
direction = 'long')
虽然这会添加时间和 ID 变量,但如果您想知道新行来自哪个日期块,这可能会有用
我们也可以group_split
然后reshape
library(dplyr)
library(purrr)
library(tidyr)
library(janitor)
df1 %>%
group_split(grp = cumsum(Variable == 'Date'), keep = FALSE) %>%
map_dfr(~ .x %>%
row_to_names(row_number = 1)) %>%
pivot_longer(cols = -Date, values_drop_na = TRUE) %>%
pivot_wider(names_from = Date, values_from = value)
我想知道如何重塑以下数据集:
这是生成以下内容的代码:
data.frame(Variable = c("Date","Location_1","Location_2","Date","Location_1","Location_2"),
Monday = c("7/1/20","1","2","7/3/20","1","2"),
Tuesday = c("7/2/20","5","2","7/4/20","5","2"))
Variable Monday Tuesday
Date 7/1/20 7/2/20
Location_1 1 5
Location_2 2 2
Date 7/3/20 7/4/20
Location_1 1 5
Location_2 2 2
进入
Date Location_1 Location_2
7/1/20 1 2
7/2/20 5 2
7/3/20 1 2
7/4/20 5 2
我不确定为什么我在正确使用转置方面遇到困难,但这似乎是我所缺少的一个简单的解决方案。任何帮助将不胜感激。
谢谢!
获取长格式数据,创建标识符列并返回宽格式。
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -Variable) %>%
select(-name) %>%
group_by(Variable) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = Variable, values_from = value) %>%
select(-row)
# Date Location_1 Location_2
# <chr> <chr> <chr>
#1 7/1/20 1 2
#2 7/2/20 5 2
#3 7/3/20 1 2
#4 7/4/20 5 2
在 data.table
使用 melt
+ dcast
library(data.table)
dcast(melt(setDT(df), id.vars = 'Variable'), rowid(Variable)~Variable,
value.var = 'value')
使用 base R 你可以这样做:
df_t <- as.data.frame(t(df))[-1, ]
names(df_t) <- df$Variable
reshape(df_t,
varying = list(grep('Date', names(df_t), value = T),
grep('Location_1', names(df_t), value = T),
grep('Location_2', names(df_t), value = T)),
v.names = c('Date', 'Location_1', 'Location_2'),
direction = 'long')
虽然这会添加时间和 ID 变量,但如果您想知道新行来自哪个日期块,这可能会有用
我们也可以group_split
然后reshape
library(dplyr)
library(purrr)
library(tidyr)
library(janitor)
df1 %>%
group_split(grp = cumsum(Variable == 'Date'), keep = FALSE) %>%
map_dfr(~ .x %>%
row_to_names(row_number = 1)) %>%
pivot_longer(cols = -Date, values_drop_na = TRUE) %>%
pivot_wider(names_from = Date, values_from = value)