在 R 中重塑 data.frame
reshape a data.frame in R
我在 R 中有一个 data.frame。
df = data.frame(custid=c(1,2,3,4),prod1=c('jeans','tshirt','jacket','tshirt'),prod1_hnode1=c(1,2,3,2),prod1_hnode2=c(6,7,8,7),prod2=c('tshirt','jeans','jacket','shirt'),prod2_hnode1=c(2,1,3,4),prod2_hnode2=c(7,6,8,7))
> df
custid prod1 prod1_hnode1 prod1_hnode2 prod2 prod2_hnode1 prod2_hnode2
1 1 jeans 1 6 tshirt 2 7
2 2 tshirt 2 7 jeans 1 6
3 3 jacket 3 8 jacket 3 8
4 4 tshirt 2 7 shirt 4 7
我怎样才能将它重塑为?
custid prod rec hnode1 hnode2
1 1 prod1 jeans 1 6
2 1 prod2 tshirt 2 7
3 2 prod1 tshirt 2 7
4 2 prod2 jeans 1 6
5 3 prod1 jacket 3 8
6 3 prod2 jacket 3 8
7 4 prod1 tshirt 2 7
8 4 prod2 shirt 4 7
刚刚在 python
中得到了如何执行此操作的答案。也对 R
解决方案感到好奇。
我们可以通过 data.table
中的 melt
来做到这一点
library(data.table)
melt(setDT(df), measure = patterns("^prod\d+$", "hnode1", "hnode2"),
value.name = c("rec", "hnode1", "hnode2"), variable.name = 'prod')[,
prod := paste0("prod", prod)][order(custid)]
# custid prod rec hnode1 hnode2
#1: 1 prod1 jeans 1 6
#2: 1 prod2 tshirt 2 7
#3: 2 prod1 tshirt 2 7
#4: 2 prod2 jeans 1 6
#5: 3 prod1 jacket 3 8
#6: 3 prod2 jacket 3 8
#7: 4 prod1 tshirt 2 7
#8: 4 prod2 shirt 4 7
另一种方法是使用基础 R 的 reshape
函数。
尝试:
long <- reshape(df, direction = "long", idvar = "custid",
varying = list(c(2, 5), c(3, 6), c(4, 7)),
sep = "", times = c("prod1", "prod2"))
此时你已经大功告成了,但你还可以美化你的行名和列名:
rownames(long) <- NULL
colnames(long) <- c("custid", "prod", "rec", "hnode1", "hnode2")
long
# custid prod rec hnode1 hnode2
# 1 1 prod1 jeans 1 6
# 2 2 prod1 tshirt 2 7
# 3 3 prod1 jacket 3 8
# 4 4 prod1 tshirt 2 7
# 5 1 prod2 tshirt 2 7
# 6 2 prod2 jeans 1 6
# 7 3 prod2 jacket 3 8
# 8 4 prod2 shirt 4 7
我实在想不出不涉及合并两个数据子集的 "tidyverse" 方法。这是获得所需输出的结果:
library(tidyverse)
left <- df %>%
select(custid, prod1, prod2) %>%
gather(prod, rec, -custid) %>%
arrange(custid)
right <- df %>%
select(custid, contains("node")) %>%
gather(var, val, -custid) %>%
mutate(var = sub(".*_", "", var)) %>%
group_by(custid, var) %>%
mutate(ind = sequence(n())) %>%
spread(var, val) %>%
ungroup() %>%
select(-ind, -custid)
cbind(left, right)
我在 R 中有一个 data.frame。
df = data.frame(custid=c(1,2,3,4),prod1=c('jeans','tshirt','jacket','tshirt'),prod1_hnode1=c(1,2,3,2),prod1_hnode2=c(6,7,8,7),prod2=c('tshirt','jeans','jacket','shirt'),prod2_hnode1=c(2,1,3,4),prod2_hnode2=c(7,6,8,7))
> df
custid prod1 prod1_hnode1 prod1_hnode2 prod2 prod2_hnode1 prod2_hnode2
1 1 jeans 1 6 tshirt 2 7
2 2 tshirt 2 7 jeans 1 6
3 3 jacket 3 8 jacket 3 8
4 4 tshirt 2 7 shirt 4 7
我怎样才能将它重塑为?
custid prod rec hnode1 hnode2
1 1 prod1 jeans 1 6
2 1 prod2 tshirt 2 7
3 2 prod1 tshirt 2 7
4 2 prod2 jeans 1 6
5 3 prod1 jacket 3 8
6 3 prod2 jacket 3 8
7 4 prod1 tshirt 2 7
8 4 prod2 shirt 4 7
刚刚在 python
R
解决方案感到好奇。
我们可以通过 data.table
melt
来做到这一点
library(data.table)
melt(setDT(df), measure = patterns("^prod\d+$", "hnode1", "hnode2"),
value.name = c("rec", "hnode1", "hnode2"), variable.name = 'prod')[,
prod := paste0("prod", prod)][order(custid)]
# custid prod rec hnode1 hnode2
#1: 1 prod1 jeans 1 6
#2: 1 prod2 tshirt 2 7
#3: 2 prod1 tshirt 2 7
#4: 2 prod2 jeans 1 6
#5: 3 prod1 jacket 3 8
#6: 3 prod2 jacket 3 8
#7: 4 prod1 tshirt 2 7
#8: 4 prod2 shirt 4 7
另一种方法是使用基础 R 的 reshape
函数。
尝试:
long <- reshape(df, direction = "long", idvar = "custid",
varying = list(c(2, 5), c(3, 6), c(4, 7)),
sep = "", times = c("prod1", "prod2"))
此时你已经大功告成了,但你还可以美化你的行名和列名:
rownames(long) <- NULL
colnames(long) <- c("custid", "prod", "rec", "hnode1", "hnode2")
long
# custid prod rec hnode1 hnode2
# 1 1 prod1 jeans 1 6
# 2 2 prod1 tshirt 2 7
# 3 3 prod1 jacket 3 8
# 4 4 prod1 tshirt 2 7
# 5 1 prod2 tshirt 2 7
# 6 2 prod2 jeans 1 6
# 7 3 prod2 jacket 3 8
# 8 4 prod2 shirt 4 7
我实在想不出不涉及合并两个数据子集的 "tidyverse" 方法。这是获得所需输出的结果:
library(tidyverse)
left <- df %>%
select(custid, prod1, prod2) %>%
gather(prod, rec, -custid) %>%
arrange(custid)
right <- df %>%
select(custid, contains("node")) %>%
gather(var, val, -custid) %>%
mutate(var = sub(".*_", "", var)) %>%
group_by(custid, var) %>%
mutate(ind = sequence(n())) %>%
spread(var, val) %>%
ungroup() %>%
select(-ind, -custid)
cbind(left, right)