数据 table,与异常格式 table 融为一体
Data table, melt with unusual formatted tables
我有一个 data.table 类似于以下内容:
data.table(`COLOUR` = c("red", "blue", "green"),
`COUNT(1:1)` = 5:7,
`COUNT(2:1)` = 1:3,
`COUNT(1:2)` = 1:3,
`COUNT(2:2)` = 6:8,
`RATE(1:1)` = 1/(1:3),
`RATE(2:1)` = 2/(2:4),
`RATE(1:2)` = 3/(4:6),
`RATE(2:2)` = 1,
`INDICATOR(2)` = c("left", "lefter", "leftest")
`INDICATOR(1)` = c("right", "righter", "more right"))
我需要melt
它来提供类似于下面的长结果:
structure(list(COLOUR = c("red", "red", "red", "red", "blue",
"blue", "blue", "blue", "green", "green", "green", "green"),
x = c(1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2), y = c(1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2), INDICATOR = c("right", "left",
"right", "left", "righter", "lefter", "righter", "lefter",
"more right", "leftest", "more right", "leftest"), COUNT = c(5,
2, 1, 6, 6, 2, 2, 7, 7, 3, 3, 8), RATE = c(1, 0.75, 1, 1,
0.5, 0.6667, 0.6, 1, 0.333, 0.5, 0.5, 1)), row.names = c(NA,
-12L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000014d26c41ef0>)
您会注意到原始日期集中的列在其列名中存储了两个变量,用于 COUNT
和 RATE
:x
和 y
值, INDICATOR
列在其列名中存储 y
值。
我希望为此使用 melt
,我怀疑它需要多个应用程序,但我不太确定最好的方法是什么。
我们可以分两步完成
library(data.table)
nm1 <- names(dt1)[!grepl("^INDICATOR", names(dt1))]
dt2 <- melt(dt1[, ..nm1], id.var = c("COLOUR"))[,
c('variable', 'x', 'y') := tstrsplit(variable, "[():]")][]
dt3 <- melt(dt1[, .SD, .SDcols = patterns("^(COLOUR|INDICATOR)")],
id.var = 'COLOUR', value.name = 'INDICATOR')[,
y := sub(".*\((\d+)\)", "\1", variable)][]
dcast(dt2[dt3, on = .(COLOUR, y)], COLOUR + INDICATOR + x + y ~
variable, value.var = 'value')[
order(factor(COLOUR, levels = c('red', 'blue', 'green')), x, y)]
# COLOUR INDICATOR x y COUNT RATE
# 1: red right 1 1 5 1.0000000
# 2: red left 1 2 1 0.7500000
# 3: red right 2 1 1 1.0000000
# 4: red left 2 2 6 1.0000000
# 5: blue righter 1 1 6 0.5000000
# 6: blue lefter 1 2 2 0.6000000
# 7: blue righter 2 1 2 0.6666667
# 8: blue lefter 2 2 7 1.0000000
# 9: green more right 1 1 7 0.3333333
#10: green leftest 1 2 3 0.5000000
#11: green more right 2 1 3 0.5000000
#12: green leftest 2 2 8 1.0000000
我们可以将 pivot_longer
与 names_pattern
参数一起使用,这样我们在各自的列中就有了数据。我们可以 select INDICATOR
列基于 y
值。
library(dplyr)
tidyr::pivot_longer(dt,
cols = -c(COLOUR,starts_with('INDICATOR')),
names_to = c('.value', 'x', 'y'),
names_pattern = '(.*)\((\d+):(\d+)\)') %>%
mutate(INDICATOR = ifelse(y == 1, `INDICATOR(1)`, `INDICATOR(2)`)) %>%
select(-c(`INDICATOR(1)`, `INDICATOR(2)`))
# A tibble: 12 x 6
# COLOUR x y COUNT RATE INDICATOR
# <chr> <chr> <chr> <int> <dbl> <chr>
# 1 red 1 1 5 1 right
# 2 red 2 1 1 1 right
# 3 red 1 2 1 0.75 left
# 4 red 2 2 6 1 left
# 5 blue 1 1 6 0.5 brighter
# 6 blue 2 1 2 0.667 brighter
# 7 blue 1 2 2 0.6 letter
# 8 blue 2 2 7 1 letter
# 9 green 1 1 7 0.333 more right
#10 green 2 1 3 0.5 more right
#11 green 1 2 3 0.5 leftist
#12 green 2 2 8 1 leftest
我有一个 data.table 类似于以下内容:
data.table(`COLOUR` = c("red", "blue", "green"),
`COUNT(1:1)` = 5:7,
`COUNT(2:1)` = 1:3,
`COUNT(1:2)` = 1:3,
`COUNT(2:2)` = 6:8,
`RATE(1:1)` = 1/(1:3),
`RATE(2:1)` = 2/(2:4),
`RATE(1:2)` = 3/(4:6),
`RATE(2:2)` = 1,
`INDICATOR(2)` = c("left", "lefter", "leftest")
`INDICATOR(1)` = c("right", "righter", "more right"))
我需要melt
它来提供类似于下面的长结果:
structure(list(COLOUR = c("red", "red", "red", "red", "blue",
"blue", "blue", "blue", "green", "green", "green", "green"),
x = c(1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2), y = c(1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2), INDICATOR = c("right", "left",
"right", "left", "righter", "lefter", "righter", "lefter",
"more right", "leftest", "more right", "leftest"), COUNT = c(5,
2, 1, 6, 6, 2, 2, 7, 7, 3, 3, 8), RATE = c(1, 0.75, 1, 1,
0.5, 0.6667, 0.6, 1, 0.333, 0.5, 0.5, 1)), row.names = c(NA,
-12L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000014d26c41ef0>)
您会注意到原始日期集中的列在其列名中存储了两个变量,用于 COUNT
和 RATE
:x
和 y
值, INDICATOR
列在其列名中存储 y
值。
我希望为此使用 melt
,我怀疑它需要多个应用程序,但我不太确定最好的方法是什么。
我们可以分两步完成
library(data.table)
nm1 <- names(dt1)[!grepl("^INDICATOR", names(dt1))]
dt2 <- melt(dt1[, ..nm1], id.var = c("COLOUR"))[,
c('variable', 'x', 'y') := tstrsplit(variable, "[():]")][]
dt3 <- melt(dt1[, .SD, .SDcols = patterns("^(COLOUR|INDICATOR)")],
id.var = 'COLOUR', value.name = 'INDICATOR')[,
y := sub(".*\((\d+)\)", "\1", variable)][]
dcast(dt2[dt3, on = .(COLOUR, y)], COLOUR + INDICATOR + x + y ~
variable, value.var = 'value')[
order(factor(COLOUR, levels = c('red', 'blue', 'green')), x, y)]
# COLOUR INDICATOR x y COUNT RATE
# 1: red right 1 1 5 1.0000000
# 2: red left 1 2 1 0.7500000
# 3: red right 2 1 1 1.0000000
# 4: red left 2 2 6 1.0000000
# 5: blue righter 1 1 6 0.5000000
# 6: blue lefter 1 2 2 0.6000000
# 7: blue righter 2 1 2 0.6666667
# 8: blue lefter 2 2 7 1.0000000
# 9: green more right 1 1 7 0.3333333
#10: green leftest 1 2 3 0.5000000
#11: green more right 2 1 3 0.5000000
#12: green leftest 2 2 8 1.0000000
我们可以将 pivot_longer
与 names_pattern
参数一起使用,这样我们在各自的列中就有了数据。我们可以 select INDICATOR
列基于 y
值。
library(dplyr)
tidyr::pivot_longer(dt,
cols = -c(COLOUR,starts_with('INDICATOR')),
names_to = c('.value', 'x', 'y'),
names_pattern = '(.*)\((\d+):(\d+)\)') %>%
mutate(INDICATOR = ifelse(y == 1, `INDICATOR(1)`, `INDICATOR(2)`)) %>%
select(-c(`INDICATOR(1)`, `INDICATOR(2)`))
# A tibble: 12 x 6
# COLOUR x y COUNT RATE INDICATOR
# <chr> <chr> <chr> <int> <dbl> <chr>
# 1 red 1 1 5 1 right
# 2 red 2 1 1 1 right
# 3 red 1 2 1 0.75 left
# 4 red 2 2 6 1 left
# 5 blue 1 1 6 0.5 brighter
# 6 blue 2 1 2 0.667 brighter
# 7 blue 1 2 2 0.6 letter
# 8 blue 2 2 7 1 letter
# 9 green 1 1 7 0.333 more right
#10 green 2 1 3 0.5 more right
#11 green 1 2 3 0.5 leftist
#12 green 2 2 8 1 leftest