在 R 中导入交叉 table
Importing cross-table in R
我正在尝试将十字 table 导入 R 中以供使用。 table 如下所示:
df <- data.frame(`0` = c(.1, .2, .3),
`15` = c(70, 70, 71),
`30` = c(71, 72, 75))
或者:
0 15 30
.1 70 71
.2 70 72
.3 71 75
我想要的是写一段代码将其翻译成如下格式:
df_transformed <- data.frame(x = c(15, 15, 15, 30, 30, 30), y = c(.1, .2, .3, .1, .2, .3), z = c(70, 70, 71, 71, 72, 75)
或者:
x y z
15 .1 70
15 .2 70
15 .3 71
30 .1 71
30 .2 72
30 .3 75
这样 df_transformed
将描述每个可能的组合和附加值,就像您在交叉 table (df
) 中查找一样。即:如果df
的列等于15
,行等于.1
,则对应的值为70
。是否有捷径可寻?我尝试了 tidyr::gather()
,但它无法描述每个唯一的 z
值。
您可以使用 outer
对行和列的所有排列应用 Vectorized
FUN
函数,
FUN <- Vectorize(function(x, y) df[x, y + 1])
res <- data.frame(x=colnames(m), y=rownames(m),
z=as.vector(outer(1:nrow(m), 1:ncol(m), FUN)),
stringsAsFactors=FALSE)
res
# x y z
# 1 X15 0.1 70
# 2 X30 0.2 70
# 3 X15 0.3 71
# 4 X30 0.1 71
# 5 X15 0.2 72
# 6 X30 0.3 75
其中:
sapply(res, class)
# x y z
# "character" "character" "numeric"
如果您希望 x 和 y 是纯数字,请执行:
res <- transform(res,
x=as.numeric(gsub("\D", "", x)),
y=as.numeric(y)
)
# x y z
# 1 15 0.1 70
# 2 30 0.2 70
# 3 15 0.3 71
# 4 30 0.1 71
# 5 15 0.2 72
# 6 30 0.3 75
更一般地,分解矩阵m
C1 C2 C3 C4
R1 9086 9220 11985 469
R2 5627 6463 5447 258
R3 6561 1632 1773 72
R4 12468 12555 13409 1148
R5 21886 28320 22227 2502
R6 23231 22127 23402 3242
进入它的行和列坐标我们可以这样做:
fun <- Vectorize(function(x, y) m[x, y])
res <- cbind(rownames(m), colnames(m), as.vector(outer(1:6, 1:4, fun)))
res
# [,1] [,2] [,3]
# [1,] "R1" "C1" "9086"
# [2,] "R2" "C2" "5627"
# [3,] "R3" "C3" "6561"
# [4,] "R4" "C4" "12468"
# [5,] "R5" "C1" "21886"
# [6,] "R6" "C2" "23231"
# [7,] "R1" "C3" "9220"
# [8,] "R2" "C4" "6463"
# [9,] "R3" "C1" "1632"
# [10,] "R4" "C2" "12555"
# [11,] "R5" "C3" "28320"
# [12,] "R6" "C4" "22127"
# [13,] "R1" "C1" "11985"
# [14,] "R2" "C2" "5447"
# [15,] "R3" "C3" "1773"
# [16,] "R4" "C4" "13409"
# [17,] "R5" "C1" "22227"
# [18,] "R6" "C2" "23402"
# [19,] "R1" "C3" "469"
# [20,] "R2" "C4" "258"
# [21,] "R3" "C1" "72"
# [22,] "R4" "C2" "1148"
# [23,] "R5" "C3" "2502"
# [24,] "R6" "C4" "3242"
数据
m <- structure(c(9086, 5627, 6561, 12468, 21886, 23231, 9220, 6463,
1632, 12555, 28320, 22127, 11985, 5447, 1773, 13409, 22227, 23402,
469, 258, 72, 1148, 2502, 3242), .Dim = c(6L, 4L), .Dimnames = list(
c("R1", "R2", "R3", "R4", "R5", "R6"), c("C1", "C2", "C3",
"C4")))
使用 tidyverse
你可以:
library(tidyverse)
df %>%
pivot_longer(cols = c(X15, X30), names_to = c("x"), values_to = "z", names_pattern = "X(\d+)", names_ptypes = list(x = numeric())) %>%
rename(y = X0) %>%
arrange(x, y) %>%
select(x, y, z)
输出
# A tibble: 6 x 3
x y z
<dbl> <dbl> <dbl>
1 15 0.1 70
2 15 0.2 70
3 15 0.3 71
4 30 0.1 71
5 30 0.2 72
6 30 0.3 75
这是使用 stack
的基础 R 解决方案
dfout <- setNames(cbind(df[1],stack(df[-1])),u <-c("y","z","x"))[order(u)]
这样
> dfout
x y z
1 X15 0.1 70
2 X15 0.2 70
3 X15 0.3 71
4 X30 0.1 71
5 X30 0.2 72
6 X30 0.3 75
使用gather()
:
library(tidyverse)
df %>%
gather(key = "15", value = "30", -`0`) %>%
select(x = `15`, y = `0`, z = `30`) %>%
arrange(x, y)
gather()
可能因为其棘手的参数规范而无法正常工作。这就是发明 pivot_longer()
的原因之一。
我知道上面有一个 pivot_longer()
答案利用了函数的所有特殊参数,但在您的情况下,这可能是一个更容易理解且同样有效的实现:
df_transformed <-
df %>%
pivot_longer(cols = -`0`, names_to = "15", values_to = "30") %>%
select(x = `15`, y = `0`, z = `30`) %>%
arrange(x, y)
我正在尝试将十字 table 导入 R 中以供使用。 table 如下所示:
df <- data.frame(`0` = c(.1, .2, .3),
`15` = c(70, 70, 71),
`30` = c(71, 72, 75))
或者:
0 15 30
.1 70 71
.2 70 72
.3 71 75
我想要的是写一段代码将其翻译成如下格式:
df_transformed <- data.frame(x = c(15, 15, 15, 30, 30, 30), y = c(.1, .2, .3, .1, .2, .3), z = c(70, 70, 71, 71, 72, 75)
或者:
x y z
15 .1 70
15 .2 70
15 .3 71
30 .1 71
30 .2 72
30 .3 75
这样 df_transformed
将描述每个可能的组合和附加值,就像您在交叉 table (df
) 中查找一样。即:如果df
的列等于15
,行等于.1
,则对应的值为70
。是否有捷径可寻?我尝试了 tidyr::gather()
,但它无法描述每个唯一的 z
值。
您可以使用 outer
对行和列的所有排列应用 Vectorized
FUN
函数,
FUN <- Vectorize(function(x, y) df[x, y + 1])
res <- data.frame(x=colnames(m), y=rownames(m),
z=as.vector(outer(1:nrow(m), 1:ncol(m), FUN)),
stringsAsFactors=FALSE)
res
# x y z
# 1 X15 0.1 70
# 2 X30 0.2 70
# 3 X15 0.3 71
# 4 X30 0.1 71
# 5 X15 0.2 72
# 6 X30 0.3 75
其中:
sapply(res, class)
# x y z
# "character" "character" "numeric"
如果您希望 x 和 y 是纯数字,请执行:
res <- transform(res,
x=as.numeric(gsub("\D", "", x)),
y=as.numeric(y)
)
# x y z
# 1 15 0.1 70
# 2 30 0.2 70
# 3 15 0.3 71
# 4 30 0.1 71
# 5 15 0.2 72
# 6 30 0.3 75
更一般地,分解矩阵m
C1 C2 C3 C4
R1 9086 9220 11985 469
R2 5627 6463 5447 258
R3 6561 1632 1773 72
R4 12468 12555 13409 1148
R5 21886 28320 22227 2502
R6 23231 22127 23402 3242
进入它的行和列坐标我们可以这样做:
fun <- Vectorize(function(x, y) m[x, y])
res <- cbind(rownames(m), colnames(m), as.vector(outer(1:6, 1:4, fun)))
res
# [,1] [,2] [,3]
# [1,] "R1" "C1" "9086"
# [2,] "R2" "C2" "5627"
# [3,] "R3" "C3" "6561"
# [4,] "R4" "C4" "12468"
# [5,] "R5" "C1" "21886"
# [6,] "R6" "C2" "23231"
# [7,] "R1" "C3" "9220"
# [8,] "R2" "C4" "6463"
# [9,] "R3" "C1" "1632"
# [10,] "R4" "C2" "12555"
# [11,] "R5" "C3" "28320"
# [12,] "R6" "C4" "22127"
# [13,] "R1" "C1" "11985"
# [14,] "R2" "C2" "5447"
# [15,] "R3" "C3" "1773"
# [16,] "R4" "C4" "13409"
# [17,] "R5" "C1" "22227"
# [18,] "R6" "C2" "23402"
# [19,] "R1" "C3" "469"
# [20,] "R2" "C4" "258"
# [21,] "R3" "C1" "72"
# [22,] "R4" "C2" "1148"
# [23,] "R5" "C3" "2502"
# [24,] "R6" "C4" "3242"
数据
m <- structure(c(9086, 5627, 6561, 12468, 21886, 23231, 9220, 6463,
1632, 12555, 28320, 22127, 11985, 5447, 1773, 13409, 22227, 23402,
469, 258, 72, 1148, 2502, 3242), .Dim = c(6L, 4L), .Dimnames = list(
c("R1", "R2", "R3", "R4", "R5", "R6"), c("C1", "C2", "C3",
"C4")))
使用 tidyverse
你可以:
library(tidyverse)
df %>%
pivot_longer(cols = c(X15, X30), names_to = c("x"), values_to = "z", names_pattern = "X(\d+)", names_ptypes = list(x = numeric())) %>%
rename(y = X0) %>%
arrange(x, y) %>%
select(x, y, z)
输出
# A tibble: 6 x 3
x y z
<dbl> <dbl> <dbl>
1 15 0.1 70
2 15 0.2 70
3 15 0.3 71
4 30 0.1 71
5 30 0.2 72
6 30 0.3 75
这是使用 stack
dfout <- setNames(cbind(df[1],stack(df[-1])),u <-c("y","z","x"))[order(u)]
这样
> dfout
x y z
1 X15 0.1 70
2 X15 0.2 70
3 X15 0.3 71
4 X30 0.1 71
5 X30 0.2 72
6 X30 0.3 75
使用gather()
:
library(tidyverse)
df %>%
gather(key = "15", value = "30", -`0`) %>%
select(x = `15`, y = `0`, z = `30`) %>%
arrange(x, y)
gather()
可能因为其棘手的参数规范而无法正常工作。这就是发明 pivot_longer()
的原因之一。
我知道上面有一个 pivot_longer()
答案利用了函数的所有特殊参数,但在您的情况下,这可能是一个更容易理解且同样有效的实现:
df_transformed <-
df %>%
pivot_longer(cols = -`0`, names_to = "15", values_to = "30") %>%
select(x = `15`, y = `0`, z = `30`) %>%
arrange(x, y)