内部加入编码

Inner join with encoding

我有两个table。我的意图是在它们之间进行内部连接。

library(dplyr)
        
Table1<-structure(list(ID = c(1, 2, 3, 4, 5), Sales = c(100, 200, 300, 
                                                                400, 500)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))
        
Table2<-structure(list(ID = c(1, 1, 2, 3, 3, 3, 4, 5, 5, 5), Store = c(1, 
                                                                               2, 3, 4, 5, 6, 4, 10, 15, 20)), row.names = c(NA, -10L), class = c("tbl_df", 
                                                                                                                                              "tbl", "data.frame"))

  INNER_JOIN_TABLE<-inner_join(Table1,Table2,by = c("ID"="ID"))

我已经用下面的这个命令进行了内部连接,但不幸的是这个命令只能进行内部连接,但我想要一些不同的东西,比如下面的 table。

那么谁能帮我解决这个问题并像上图那样table?

这将为您提供所需的结果:

library(reshape2)
dcast_Table2 <- dcast(Table2,formula = ID ~Store,fill = "")
dcast_Table2[-1][dcast_Table2[-1] != ""] <- 1

INNER_JOIN_TABLE<-inner_join(Table1,dcast_Table2,by = c("ID"="ID"))
INNER_JOIN_TABLE

输出:

> INNER_JOIN_TABLE
# A tibble: 5 x 11
     ID Sales `1`   `2`   `3`   `4`   `5`   `6`   `10`  `15`  `20` 
  <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1     1   100 "1"   "1"   ""    ""    ""    ""    ""    ""    ""   
2     2   200 ""    ""    "1"   ""    ""    ""    ""    ""    ""   
3     3   300 ""    ""    ""    "1"   "1"   "1"   ""    ""    ""   
4     4   400 ""    ""    ""    "1"   ""    ""    ""    ""    ""   
5     5   500 ""    ""    ""    ""    ""    ""    "1"   "1"   "1" 

将空值更改为 0:

library(reshape2)
dcast_Table2 <- dcast(Table2,formula = ID ~Store,fill = "")
dcast_Table2[-1][dcast_Table2[-1] != ""] <- 1
dcast_Table2[-1][dcast_Table2[-1] == ""] <- 0

INNER_JOIN_TABLE<-inner_join(Table1,dcast_Table2,by = c("ID"="ID"))
INNER_JOIN_TABLE

输出:

> INNER_JOIN_TABLE
# A tibble: 5 x 11
     ID Sales `1`   `2`   `3`   `4`   `5`   `6`   `10`  `15`  `20` 
  <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1     1   100 1     1     0     0     0     0     0     0     0    
2     2   200 0     0     1     0     0     0     0     0     0    
3     3   300 0     0     0     1     1     1     0     0     0    
4     4   400 0     0     0     1     0     0     0     0     0    
5     5   500 0     0     0     0     0     0     1     1     1    

要删除引用,只需将 table 打印为 DataFrame 而不是 Tibble。

as.data.frame(INNER_JOIN_TABLE)

输出:

> as.data.frame(INNER_JOIN_TABLE)
  ID Sales 1 2 3 4 5 6 10 15 20
1  1   100 1 1 0 0 0 0  0  0  0
2  2   200 0 0 1 0 0 0  0  0  0
3  3   300 0 0 0 1 1 1  0  0  0
4  4   400 0 0 0 1 0 0  0  0  0
5  5   500 0 0 0 0 0 0  1  1  1