从长到宽重塑数据框

Reshaping dataframe from long to wide

我有以下 data.frame:

t1 = data.frame(
  t_id = c("61","61","61","62","62","63"), 
  u_id = c("84","85","86","84","87","88"), 
  type = c("d","d","d","s","s","d"),
  v1 = c(0.25, 0.25, 0.25, 0.35, 0.35, 0.45),v2 = c(0.30, 0.30, 0.40, 0.50, 0.50, 1.00),
  tdate = as.Date(c("2015-11-01","2015-11-02","2015-11-03","2015-10-01","2015-10-02","2015-09-01"))
 );

这是它的样子:

  t_id u_id type   v1  v2      tdate
1   61   84    d 0.25 0.3 2015-11-01
2   61   85    d 0.25 0.3 2015-11-02
3   61   86    d 0.25 0.4 2015-11-03
4   62   84    s 0.35 0.5 2015-10-01
5   62   87    s 0.35 0.5 2015-10-02
6   63   88    d 0.45 1.0 2015-09-01

t_id 是 transaction_id 而 u_id 是 user_id。我希望输出按 t_idu_id 分组,并将列中的值与第一个用户的前缀 u1_ 关联,第二个用户的前缀 u2_ 关联,依此类推。假设每笔交易不会超过 3 个用户。

输出应如下所示:

t_id u1_id u1_type u1_v1 u1_v2 u1_tdate    u2_id u2_type u2_v1 u2_v2 u2_tdate    u3_id u3_type u3_v1 u3_v2 u3_tdate
 61    84     d    0.25  0.3   2015-11-01    85     d    0.25  0.3   2015-11-02    86     d    0.25  0.4   2015-11-03    
 62    84     s    0.35  0.5   2015-10-01    87     s    0.35  0.5   2015-10-02   
 63    88     d    0.45  1.0   2015-09-01    

我试过 reshape 但无济于事。关于我应该如何去做有什么想法吗?

您不能创建这样的 data.frame(即每行的列数不同)。但是您可以创建一个列表。 这非常接近:

lapply(split(t1,t1$t_id),function(x) {
  prefixes_counter=0
  if (nrow(x)>1) {
    Reduce(function(x1,x2) {
      prefixes_counter<<-prefixes_counter+1
      cn1=colnames(x1)
      cn2=colnames(x2)
      if (prefixes_counter==1) cn1[-1]=paste0("u",prefixes_counter,"_",cn1[-1])
      cn2[-1]=paste0("u",prefixes_counter+1,"_",cn2[-1])

      merge(`colnames<-`(x1,cn1),`colnames<-`(x2,cn2),by="t_id")
    },split(x,1:nrow(x)))
  } else {
    colnames(x)[-1]=paste0("u1_",colnames(x)[-1])
    x
  }
})

哦,是的,顺便说一句:我强烈支持@SabDeM 的评论。 ;)

我认为你能做的最好的就是一系列演员表:

library(reshape2)

t1 = data.frame(
  t_id = c("61","61","61","62","62","63"), 
  u_id = c("84","85","86","84","87","88"), 
  type = c("d","d","d","s","s","d"),
  v1 = c(0.25, 0.25, 0.25, 0.35, 0.35, 0.45),v2 = c(0.30, 0.30, 0.40, 0.50, 0.50, 1.00),
  tdate = as.Date(c("2015-11-01","2015-11-02","2015-11-03","2015-10-01","2015-10-02","2015-09-01")),
  stringsAsFactors=FALSE
)

vars <- names(t1)[-1]
t1$seq <- ave(t1$t_id,t1$t_id,FUN=function(x) paste0("u",seq(along=x),"_"))
out <- data.frame(t_id=unique(t1$t_id))

for(i in vars) {
  temp <- dcast(t1,t_id~seq,value.var=i)
  names(temp)[-1] <- paste0(names(temp)[-1],i)
  if(i=="tdate") temp[,-1] <- lapply(temp[,-1],as.Date)
  out <- merge(out,temp)
}

out <- out[,sort(names(out))]