将多列合并为一列时保留列信息
Retaining column information when melting multiple columns into one
我有一个 data.table 我融化了如下:
library(data.table)
DT <- fread(
"ID country year Event_A Event_B
4 NLD 2002 0 1
5 NLD 2002 0 1
6 NLD 2006 1 1
7 NLD 2006 1 0
8 NLD 2006 1 1
9 GBR 2002 0 1
10 GBR 2002 0 0
11 GBR 2002 0 1
12 GBR 2006 1 1
13 GBR 2006 1 1",
header = TRUE)
melt(DT, id.var = setdiff(names(DT), c("Event_A", "Event_B")),
value.name = 'Event')[, variable := NULL][order(ID)]
# ID country year Event
# 1: 4 NLD 2002 0
# 2: 4 NLD 2002 1
# 3: 5 NLD 2002 0
# 4: 5 NLD 2002 1
# 5: 6 NLD 2006 1
# 6: 6 NLD 2006 1
# 7: 7 NLD 2006 1
# 8: 7 NLD 2006 0
# 9: 8 NLD 2006 1
#10: 8 NLD 2006 1
#11: 9 GBR 2002 0
#12: 9 GBR 2002 1
#13: 10 GBR 2002 0
#14: 10 GBR 2002 0
#15: 11 GBR 2002 0
#16: 11 GBR 2002 1
#17: 12 GBR 2006 1
#18: 12 GBR 2006 1
#19: 13 GBR 2006 1
#20: 13 GBR 2006 1
然而,事后看来,我想在融化的数据集中有事件类别。我如何确保这些信息保留在融化的数据中?
编辑(由于原始 post 中的过度简化):
DT <- fread(
"ID country year Event_A Event_B Choice_A Choice_B
4 NLD 2002 0 1 0 1
5 NLD 2002 0 1 1 1
6 NLD 2006 1 1 0 1
7 NLD 2006 1 0 1 1
8 NLD 2006 1 1 1 1
9 GBR 2002 0 1 1 0
10 GBR 2002 0 0 1 1
11 GBR 2002 0 1 0 1
12 GBR 2006 1 1 1 1
13 GBR 2006 1 1 0 0",
header = TRUE)
DT<- melt(DT, measure = patterns("^Event_", "^Choice_"),
value.name = c("Event", "Choice"))[, variable := NULL][order(ID)]
期望的输出:
# ID country year Event Event_Cat Choice Choice_Cat
# 1: 4 NLD 2002 0 A 0 A
# 2: 4 NLD 2002 1 B 1 B
# 3: 5 NLD 2002 0 A
# 4: 5 NLD 2002 1 B
# 5: 6 NLD 2006 1 A
# 6: 6 NLD 2006 1 B
# 7: 7 NLD 2006 1
# 8: 7 NLD 2006 0
# 9: 8 NLD 2006 1
#10: 8 NLD 2006 1
#11: 9 GBR 2002 0
#12: 9 GBR 2002 1
#13: 10 GBR 2002 0
#14: 10 GBR 2002 0
#15: 11 GBR 2002 0
#16: 11 GBR 2002 1
#17: 12 GBR 2006 1
#18: 12 GBR 2006 1
#19: 13 GBR 2006 1
#20: 13 GBR 2006 1
您可以使用 tidyr
中的 pivot_longer
:
tidyr::pivot_longer(DT, cols = starts_with('Event'),
names_to = c('.value', 'Event_Cat'),
names_sep = '_')
# ID country year Event_Cat Event
# <int> <chr> <int> <chr> <int>
# 1 4 NLD 2002 A 0
# 2 4 NLD 2002 B 1
# 3 5 NLD 2002 A 0
# 4 5 NLD 2002 B 1
# 5 6 NLD 2006 A 1
# 6 6 NLD 2006 B 1
# 7 7 NLD 2006 A 1
# 8 7 NLD 2006 B 0
# 9 8 NLD 2006 A 1
#10 8 NLD 2006 B 1
#11 9 GBR 2002 A 0
#12 9 GBR 2002 B 1
#13 10 GBR 2002 A 0
#14 10 GBR 2002 B 0
#15 11 GBR 2002 A 0
#16 11 GBR 2002 B 1
#17 12 GBR 2006 A 1
#18 12 GBR 2006 B 1
#19 13 GBR 2006 A 1
#20 13 GBR 2006 B 1
不要NULL
验证variable.name
:
setnames(
melt(DT, id.var = setdiff(names(DT), c("Event_A", "Event_B")), value.name = 'Event')[
, variable:=sub("Event_", "", variable)][order(ID)],
old="variable", new="Event_Cat")
ID country year Event_Cat Event
1: 4 NLD 2002 A 0
2: 4 NLD 2002 B 1
3: 5 NLD 2002 A 0
4: 5 NLD 2002 B 1
5: 6 NLD 2006 A 1
6: 6 NLD 2006 B 1 ...
编辑,根据提供的新信息(融合多列)。
DT2 <- setnames(
melt(DT, measure = patterns("^Event_", "^Choice_"),
value.name = c("Event", "Choice"))[, variable := forcats::lvls_revalue(variable,
c("A", "B"))][order(ID)],
old="variable", new="Cetegory")
DT2
ID country year Cetegory Event Choice
1: 4 NLD 2002 A 0 0
2: 4 NLD 2002 B 1 1
3: 5 NLD 2002 A 0 1
4: 5 NLD 2002 B 1 1
5: 6 NLD 2006 A 1 0
6: 6 NLD 2006 B 1 1 ...
我有一个 data.table 我融化了如下:
library(data.table)
DT <- fread(
"ID country year Event_A Event_B
4 NLD 2002 0 1
5 NLD 2002 0 1
6 NLD 2006 1 1
7 NLD 2006 1 0
8 NLD 2006 1 1
9 GBR 2002 0 1
10 GBR 2002 0 0
11 GBR 2002 0 1
12 GBR 2006 1 1
13 GBR 2006 1 1",
header = TRUE)
melt(DT, id.var = setdiff(names(DT), c("Event_A", "Event_B")),
value.name = 'Event')[, variable := NULL][order(ID)]
# ID country year Event
# 1: 4 NLD 2002 0
# 2: 4 NLD 2002 1
# 3: 5 NLD 2002 0
# 4: 5 NLD 2002 1
# 5: 6 NLD 2006 1
# 6: 6 NLD 2006 1
# 7: 7 NLD 2006 1
# 8: 7 NLD 2006 0
# 9: 8 NLD 2006 1
#10: 8 NLD 2006 1
#11: 9 GBR 2002 0
#12: 9 GBR 2002 1
#13: 10 GBR 2002 0
#14: 10 GBR 2002 0
#15: 11 GBR 2002 0
#16: 11 GBR 2002 1
#17: 12 GBR 2006 1
#18: 12 GBR 2006 1
#19: 13 GBR 2006 1
#20: 13 GBR 2006 1
然而,事后看来,我想在融化的数据集中有事件类别。我如何确保这些信息保留在融化的数据中?
编辑(由于原始 post 中的过度简化):
DT <- fread(
"ID country year Event_A Event_B Choice_A Choice_B
4 NLD 2002 0 1 0 1
5 NLD 2002 0 1 1 1
6 NLD 2006 1 1 0 1
7 NLD 2006 1 0 1 1
8 NLD 2006 1 1 1 1
9 GBR 2002 0 1 1 0
10 GBR 2002 0 0 1 1
11 GBR 2002 0 1 0 1
12 GBR 2006 1 1 1 1
13 GBR 2006 1 1 0 0",
header = TRUE)
DT<- melt(DT, measure = patterns("^Event_", "^Choice_"),
value.name = c("Event", "Choice"))[, variable := NULL][order(ID)]
期望的输出:
# ID country year Event Event_Cat Choice Choice_Cat
# 1: 4 NLD 2002 0 A 0 A
# 2: 4 NLD 2002 1 B 1 B
# 3: 5 NLD 2002 0 A
# 4: 5 NLD 2002 1 B
# 5: 6 NLD 2006 1 A
# 6: 6 NLD 2006 1 B
# 7: 7 NLD 2006 1
# 8: 7 NLD 2006 0
# 9: 8 NLD 2006 1
#10: 8 NLD 2006 1
#11: 9 GBR 2002 0
#12: 9 GBR 2002 1
#13: 10 GBR 2002 0
#14: 10 GBR 2002 0
#15: 11 GBR 2002 0
#16: 11 GBR 2002 1
#17: 12 GBR 2006 1
#18: 12 GBR 2006 1
#19: 13 GBR 2006 1
#20: 13 GBR 2006 1
您可以使用 tidyr
中的 pivot_longer
:
tidyr::pivot_longer(DT, cols = starts_with('Event'),
names_to = c('.value', 'Event_Cat'),
names_sep = '_')
# ID country year Event_Cat Event
# <int> <chr> <int> <chr> <int>
# 1 4 NLD 2002 A 0
# 2 4 NLD 2002 B 1
# 3 5 NLD 2002 A 0
# 4 5 NLD 2002 B 1
# 5 6 NLD 2006 A 1
# 6 6 NLD 2006 B 1
# 7 7 NLD 2006 A 1
# 8 7 NLD 2006 B 0
# 9 8 NLD 2006 A 1
#10 8 NLD 2006 B 1
#11 9 GBR 2002 A 0
#12 9 GBR 2002 B 1
#13 10 GBR 2002 A 0
#14 10 GBR 2002 B 0
#15 11 GBR 2002 A 0
#16 11 GBR 2002 B 1
#17 12 GBR 2006 A 1
#18 12 GBR 2006 B 1
#19 13 GBR 2006 A 1
#20 13 GBR 2006 B 1
不要NULL
验证variable.name
:
setnames(
melt(DT, id.var = setdiff(names(DT), c("Event_A", "Event_B")), value.name = 'Event')[
, variable:=sub("Event_", "", variable)][order(ID)],
old="variable", new="Event_Cat")
ID country year Event_Cat Event
1: 4 NLD 2002 A 0
2: 4 NLD 2002 B 1
3: 5 NLD 2002 A 0
4: 5 NLD 2002 B 1
5: 6 NLD 2006 A 1
6: 6 NLD 2006 B 1 ...
编辑,根据提供的新信息(融合多列)。
DT2 <- setnames(
melt(DT, measure = patterns("^Event_", "^Choice_"),
value.name = c("Event", "Choice"))[, variable := forcats::lvls_revalue(variable,
c("A", "B"))][order(ID)],
old="variable", new="Cetegory")
DT2
ID country year Cetegory Event Choice
1: 4 NLD 2002 A 0 0
2: 4 NLD 2002 B 1 1
3: 5 NLD 2002 A 0 1
4: 5 NLD 2002 B 1 1
5: 6 NLD 2006 A 1 0
6: 6 NLD 2006 B 1 1 ...