从宽变长 format/structure

Reshape from wide to long format/structure

我有一个 302 obs. of 942 variables 的数据框。看起来像这样:

            [Actr225009] [Actr225423] [Actr229853] [Actr78542]
[Actr225009]      0            NA           3            NA 
[Actr225423]      NA           0            1            5
[Actr229853]      8            NA           0            2
...

并且需要为每一行重组它:

 Actr225009 Actr225009 0
 Actr225009 Actr225423 NA
 Actr225009 Actr229853 3
 Actr225009 Actr78542  NA ...

我运行输入了这段代码:

dist.acteurs<-data.frame()
pb <- txtProgressBar(min = 0, max = length(test[,1]), style = 3)
for(i in 2: length(test[,1])){#on va jouer sur les lignes 
  for(j in 2: length(test[1,])){#on va jouer sur les coloumns
    tps<-c(as.character(test[i,1]),as.character(test[1,j]),as.character(test[i,j]))
    tps<-t(as.data.frame(tps))
    dist.acteurs<-rbind(dist.acteurs,tps)
  }
  setTxtProgressBar(pb, i)
}
close(pb)

但是 运行 需要一整天的时间,我想知道是否可以使用 lapply 或更有效的方法。

编辑:

>dput(test[1:5,1:5])


structure(list(X = structure(1:5, .Label = c("Actr22511", "Actr28440", 
"Actr28464", "Actr28604", "Actr30119", "Actr30817", "Actr30819", 
"Actr30821", "Actr30822", "Actr30934", "Actr31331", "Actr31332", 
"Actr31349", "Actr31369", "Actr32128", "Actr32178", "Actr32190", 
"Actr32207", "Actr32208", "Actr32209", "Actr32223", "Actr32232", 
"Actr32233", "Actr32234", "Actr32265", "Actr32273", "Actr32274", 
"Actr32275", "Actr32510", "Actr32533", "Actr32534", "Actr32796", 
"Actr32801", "Actr32803", "Actr32881", "Actr33774", "Actr33776", 
"Actr33778", "Actr33842", "Actr33843", "Actr33844", "Actr3603", 
"Actr40014", "Actr40020", "Actr42312", "Actr43671", "Actr43766", 
"Actr44240", "Actr44241", "Actr44247", "Actr44251", "Actr44252", 
"Actr44254", "Actr44255", "Actr44258", "Actr44259", "Actr46745", 
"Actr47687", "Actr47715", "Actr47717", "Actr47718", "Actr47720", 
"Actr47780", "Actr47783", "Actr47785", "Actr47787", "Actr47790", 
"Actr47793", "Actr47796", "Actr47797", "Actr47803", "Actr47807", 
"Actr47817", "Actr47818", "Actr47822", "Actr47823", "Actr47824", 
"Actr47826", "Actr47828", "Actr47829", "Actr48681", "Actr48789", 
"Actr48806", "Actr48809", "Actr48810", "Actr48811", "Actr48813", 
"Actr48815", "Actr48861", "Actr48978", "Actr48979", "Actr48981", 
"Actr48982", "Actr49053", "Actr49058", "Actr49098", "Actr49101", 
"Actr49110", "Actr49116", "Actr49119", "Actr49120", "Actr49186", 
"Actr49188", "Actr49189", "Actr49191", "Actr49192", "Actr49196", 
"Actr49197", "Actr49199", "Actr49200", "Actr49201", "Actr49202", 
"Actr49222", "Actr49224", "Actr49231", "Actr49232", "Actr49234", 
"Actr49235", "Actr49236", "Actr49266", "Actr49267", "Actr49271", 
"Actr49280", "Actr49424", "Actr49434", "Actr49435", "Actr49436", 
"Actr49449", "Actr49452", "Actr49562", "Actr49564", "Actr49567", 
"Actr49572", "Actr49573", "Actr49574", "Actr49606", "Actr49608", 
"Actr49609", "Actr49611", "Actr49612", "Actr49614", "Actr49615", 
"Actr49631", "Actr49634", "Actr49638", "Actr49639", "Actr49644", 
"Actr49646", "Actr49649", "Actr49650", "Actr49651", "Actr49652", 
"Actr49656", "Actr49658", "Actr49662", "Actr49667", "Actr49668", 
"Actr49670", "Actr49672", "Actr49784", "Actr49786", "Actr49787", 
"Actr49789", "Actr49794", "Actr49796", "Actr49808", "Actr49810", 
"Actr49812", "Actr49815", "Actr49822", "Actr49828", "Actr49838", 
"Actr49839", "Actr49840", "Actr49844", "Actr49846", "Actr49847", 
"Actr49851", "Actr49852", "Actr49853", "Actr49854", "Actr49858", 
"Actr49860", "Actr49863", "Actr49864", "Actr49866", "Actr49869", 
"Actr49870", "Actr49871", "Actr49875", "Actr49876", "Actr49877", 
"Actr49878", "Actr49879", "Actr49882", "Actr49883", "Actr49884", 
"Actr49885", "Actr49886", "Actr49888", "Actr49889", "Actr49892", 
"Actr49893", "Actr49894", "Actr49895", "Actr49896", "Actr49897", 
"Actr49898", "Actr49899", "Actr49900", "Actr49901", "Actr50120", 
"Actr50122", "Actr50123", "Actr50125", "Actr50126", "Actr50129", 
"Actr50130", "Actr50131", "Actr50133", "Actr50134", "Actr50135", 
"Actr50137", "Actr50138", "Actr50143", "Actr50148", "Actr50149", 
"Actr50151", "Actr50152", "Actr50154", "Actr50155", "Actr50156", 
"Actr50173", "Actr50175", "Actr50182", "Actr50184", "Actr50187", 
"Actr50188", "Actr50191", "Actr50192", "Actr50194", "Actr50195", 
"Actr50200", "Actr50202", "Actr50203", "Actr50204", "Actr50206", 
"Actr50209", "Actr50211", "Actr50212", "Actr50219", "Actr50231", 
"Actr50232", "Actr50239", "Actr50240", "Actr50241", "Actr50243", 
"Actr50246", "Actr50247", "Actr50249", "Actr50255", "Actr50256", 
"Actr50258", "Actr50263", "Actr50265", "Actr50272", "Actr50275", 
"Actr50277", "Actr50279", "Actr50281", "Actr50283", "Actr50284", 
"Actr50285", "Actr50286", "Actr50287", "Actr50288", "Actr50289", 
"Actr50290", "Actr50291", "Actr50292", "Actr50293", "Actr50294", 
"Actr50298", "Actr50552", "Actr50556", "Actr50558", "Actr50559", 
"Actr50562", "Actr50568", "Actr50605", "Actr50608", "Actr50610", 
"Actr50625", "Actr50627", "Actr50630", "Actr50631", "Actr50759", 
"Actr50776", "Actr50778"), class = "factor"), Actr22509 = c("1", 
"NA", "NA", "NA", "NA"), Actr22510 = c("1", "NA", "NA", "NA", 
"NA"), Actr22511 = c("0", "NA", "NA", "NA", "NA"), Actr22955 = c("NA", 
"NA", "1", "NA", "NA")), .Names = c("X", "Actr22509", "Actr22510", 
"Actr22511", "Actr22955"), row.names = c(NA, 5L), class = "data.frame")

我为此寻找了一个 good/updated 骗子,但没有找到任何好的东西(可能是因为 non-informative 标题),所以这里有 3 种常见的方法来处理这种情况

Base R 使用 reshape。非常讨厌的解决方案,在这种情况下通常不推荐,因为性能和复杂性。我还建议使用 row.names(Res) <- NULL

删除行名称
reshape(df, 
        idvar = "X", 
        varying = list(2:5), 
        v.names = "value",
        timevar = "variable",
        times = names(df)[-1],
        direction = "long")

#                             X  variable value
# Actr22511.Actr22509 Actr22511 Actr22509     1
# Actr28440.Actr22509 Actr28440 Actr22509    NA
# Actr28464.Actr22509 Actr28464 Actr22509    NA
# Actr28604.Actr22509 Actr28604 Actr22509    NA
# Actr30119.Actr22509 Actr30119 Actr22509    NA
...

reshape2接近

library(reshape2)
melt(df, "X")
#            X  variable value
# 1  Actr22511 Actr22509     1
# 2  Actr28440 Actr22509    NA
# 3  Actr28464 Actr22509    NA
# 4  Actr28604 Actr22509    NA
...

以及新的炒作 tidyr 方法(有人可以向我解释一下 easier/better 与 melt 有何不同吗?)

library(tidyr)
gather(df, variable, value, -X)
#            X  variable value
# 1  Actr22511 Actr22509     1
# 2  Actr28440 Actr22509    NA
# 3  Actr28464 Actr22509    NA
# 4  Actr28604 Actr22509    NA
...