在 R 中导入交叉 table

Importing cross-table in R

我正在尝试将十字 table 导入 R 中以供使用。 table 如下所示:

df <- data.frame(`0` = c(.1, .2, .3),  
`15` = c(70, 70, 71), 
`30` = c(71, 72, 75))

或者:

0    15   30
.1   70   71
.2   70   72
.3   71   75

我想要的是写一段代码将其翻译成如下格式:

df_transformed <- data.frame(x = c(15, 15, 15, 30, 30, 30), y = c(.1, .2, .3, .1, .2, .3), z = c(70, 70, 71, 71, 72, 75) 

或者:

x   y   z
15  .1  70
15  .2  70
15  .3  71
30  .1  71
30  .2  72
30  .3  75

这样 df_transformed 将描述每个可能的组合和附加值,就像您在交叉 table (df) 中查找一样。即:如果df的列等于15,行等于.1,则对应的值为70。是否有捷径可寻?我尝试了 tidyr::gather(),但它无法描述每个唯一的 z 值。

您可以使用 outer 对行和列的所有排列应用 Vectorized FUN 函数,

FUN <- Vectorize(function(x, y) df[x, y + 1])
res <- data.frame(x=colnames(m), y=rownames(m), 
                  z=as.vector(outer(1:nrow(m), 1:ncol(m), FUN)),
                  stringsAsFactors=FALSE)
res
#     x   y  z
# 1 X15 0.1 70
# 2 X30 0.2 70
# 3 X15 0.3 71
# 4 X30 0.1 71
# 5 X15 0.2 72
# 6 X30 0.3 75

其中:

sapply(res, class)
#           x           y           z 
# "character" "character"   "numeric" 

如果您希望 x 和 y 是纯数字,请执行:

res <- transform(res, 
          x=as.numeric(gsub("\D", "", x)),
          y=as.numeric(y)
          )
#    x   y  z
# 1 15 0.1 70
# 2 30 0.2 70
# 3 15 0.3 71
# 4 30 0.1 71
# 5 15 0.2 72
# 6 30 0.3 75

更一般地,分解矩阵m

      C1    C2    C3   C4
R1  9086  9220 11985  469
R2  5627  6463  5447  258
R3  6561  1632  1773   72
R4 12468 12555 13409 1148
R5 21886 28320 22227 2502
R6 23231 22127 23402 3242

进入它的行和列坐标我们可以这样做:

fun <- Vectorize(function(x, y) m[x, y])
res <- cbind(rownames(m), colnames(m), as.vector(outer(1:6, 1:4, fun)))

res
#       [,1] [,2] [,3]   
#  [1,] "R1" "C1" "9086" 
#  [2,] "R2" "C2" "5627" 
#  [3,] "R3" "C3" "6561" 
#  [4,] "R4" "C4" "12468"
#  [5,] "R5" "C1" "21886"
#  [6,] "R6" "C2" "23231"
#  [7,] "R1" "C3" "9220" 
#  [8,] "R2" "C4" "6463" 
#  [9,] "R3" "C1" "1632" 
# [10,] "R4" "C2" "12555"
# [11,] "R5" "C3" "28320"
# [12,] "R6" "C4" "22127"
# [13,] "R1" "C1" "11985"
# [14,] "R2" "C2" "5447" 
# [15,] "R3" "C3" "1773" 
# [16,] "R4" "C4" "13409"
# [17,] "R5" "C1" "22227"
# [18,] "R6" "C2" "23402"
# [19,] "R1" "C3" "469"  
# [20,] "R2" "C4" "258"  
# [21,] "R3" "C1" "72"   
# [22,] "R4" "C2" "1148" 
# [23,] "R5" "C3" "2502" 
# [24,] "R6" "C4" "3242" 

数据

m <- structure(c(9086, 5627, 6561, 12468, 21886, 23231, 9220, 6463, 
1632, 12555, 28320, 22127, 11985, 5447, 1773, 13409, 22227, 23402, 
469, 258, 72, 1148, 2502, 3242), .Dim = c(6L, 4L), .Dimnames = list(
    c("R1", "R2", "R3", "R4", "R5", "R6"), c("C1", "C2", "C3", 
    "C4")))

使用 tidyverse 你可以:

library(tidyverse)

df %>%
  pivot_longer(cols = c(X15, X30), names_to = c("x"), values_to = "z", names_pattern = "X(\d+)", names_ptypes = list(x = numeric())) %>%
  rename(y = X0) %>%
  arrange(x, y) %>%
  select(x, y, z)

输出

# A tibble: 6 x 3
      x     y     z
  <dbl> <dbl> <dbl>
1    15   0.1    70
2    15   0.2    70
3    15   0.3    71
4    30   0.1    71
5    30   0.2    72
6    30   0.3    75

这是使用 stack

的基础 R 解决方案
dfout <- setNames(cbind(df[1],stack(df[-1])),u <-c("y","z","x"))[order(u)]

这样

> dfout
    x   y  z
1 X15 0.1 70
2 X15 0.2 70
3 X15 0.3 71
4 X30 0.1 71
5 X30 0.2 72
6 X30 0.3 75

使用gather()

library(tidyverse)

df %>%
  gather(key = "15", value = "30", -`0`) %>%
  select(x = `15`, y = `0`, z = `30`) %>%
  arrange(x, y)

gather() 可能因为其棘手的参数规范而无法正常工作。这就是发明 pivot_longer() 的原因之一。

我知道上面有一个 pivot_longer() 答案利用了函数的所有特殊参数,但在您的情况下,这可能是一个更容易理解且同样有效的实现:

df_transformed <-
  df %>%
  pivot_longer(cols = -`0`, names_to = "15", values_to = "30") %>%
  select(x = `15`, y = `0`, z = `30`) %>%
  arrange(x, y)