从宽变长 format/structure
Reshape from wide to long format/structure
我有一个 302 obs. of 942 variables
的数据框。看起来像这样:
[Actr225009] [Actr225423] [Actr229853] [Actr78542]
[Actr225009] 0 NA 3 NA
[Actr225423] NA 0 1 5
[Actr229853] 8 NA 0 2
...
并且需要为每一行重组它:
Actr225009 Actr225009 0
Actr225009 Actr225423 NA
Actr225009 Actr229853 3
Actr225009 Actr78542 NA ...
我运行输入了这段代码:
dist.acteurs<-data.frame()
pb <- txtProgressBar(min = 0, max = length(test[,1]), style = 3)
for(i in 2: length(test[,1])){#on va jouer sur les lignes
for(j in 2: length(test[1,])){#on va jouer sur les coloumns
tps<-c(as.character(test[i,1]),as.character(test[1,j]),as.character(test[i,j]))
tps<-t(as.data.frame(tps))
dist.acteurs<-rbind(dist.acteurs,tps)
}
setTxtProgressBar(pb, i)
}
close(pb)
但是 运行 需要一整天的时间,我想知道是否可以使用 lapply 或更有效的方法。
编辑:
>dput(test[1:5,1:5])
structure(list(X = structure(1:5, .Label = c("Actr22511", "Actr28440",
"Actr28464", "Actr28604", "Actr30119", "Actr30817", "Actr30819",
"Actr30821", "Actr30822", "Actr30934", "Actr31331", "Actr31332",
"Actr31349", "Actr31369", "Actr32128", "Actr32178", "Actr32190",
"Actr32207", "Actr32208", "Actr32209", "Actr32223", "Actr32232",
"Actr32233", "Actr32234", "Actr32265", "Actr32273", "Actr32274",
"Actr32275", "Actr32510", "Actr32533", "Actr32534", "Actr32796",
"Actr32801", "Actr32803", "Actr32881", "Actr33774", "Actr33776",
"Actr33778", "Actr33842", "Actr33843", "Actr33844", "Actr3603",
"Actr40014", "Actr40020", "Actr42312", "Actr43671", "Actr43766",
"Actr44240", "Actr44241", "Actr44247", "Actr44251", "Actr44252",
"Actr44254", "Actr44255", "Actr44258", "Actr44259", "Actr46745",
"Actr47687", "Actr47715", "Actr47717", "Actr47718", "Actr47720",
"Actr47780", "Actr47783", "Actr47785", "Actr47787", "Actr47790",
"Actr47793", "Actr47796", "Actr47797", "Actr47803", "Actr47807",
"Actr47817", "Actr47818", "Actr47822", "Actr47823", "Actr47824",
"Actr47826", "Actr47828", "Actr47829", "Actr48681", "Actr48789",
"Actr48806", "Actr48809", "Actr48810", "Actr48811", "Actr48813",
"Actr48815", "Actr48861", "Actr48978", "Actr48979", "Actr48981",
"Actr48982", "Actr49053", "Actr49058", "Actr49098", "Actr49101",
"Actr49110", "Actr49116", "Actr49119", "Actr49120", "Actr49186",
"Actr49188", "Actr49189", "Actr49191", "Actr49192", "Actr49196",
"Actr49197", "Actr49199", "Actr49200", "Actr49201", "Actr49202",
"Actr49222", "Actr49224", "Actr49231", "Actr49232", "Actr49234",
"Actr49235", "Actr49236", "Actr49266", "Actr49267", "Actr49271",
"Actr49280", "Actr49424", "Actr49434", "Actr49435", "Actr49436",
"Actr49449", "Actr49452", "Actr49562", "Actr49564", "Actr49567",
"Actr49572", "Actr49573", "Actr49574", "Actr49606", "Actr49608",
"Actr49609", "Actr49611", "Actr49612", "Actr49614", "Actr49615",
"Actr49631", "Actr49634", "Actr49638", "Actr49639", "Actr49644",
"Actr49646", "Actr49649", "Actr49650", "Actr49651", "Actr49652",
"Actr49656", "Actr49658", "Actr49662", "Actr49667", "Actr49668",
"Actr49670", "Actr49672", "Actr49784", "Actr49786", "Actr49787",
"Actr49789", "Actr49794", "Actr49796", "Actr49808", "Actr49810",
"Actr49812", "Actr49815", "Actr49822", "Actr49828", "Actr49838",
"Actr49839", "Actr49840", "Actr49844", "Actr49846", "Actr49847",
"Actr49851", "Actr49852", "Actr49853", "Actr49854", "Actr49858",
"Actr49860", "Actr49863", "Actr49864", "Actr49866", "Actr49869",
"Actr49870", "Actr49871", "Actr49875", "Actr49876", "Actr49877",
"Actr49878", "Actr49879", "Actr49882", "Actr49883", "Actr49884",
"Actr49885", "Actr49886", "Actr49888", "Actr49889", "Actr49892",
"Actr49893", "Actr49894", "Actr49895", "Actr49896", "Actr49897",
"Actr49898", "Actr49899", "Actr49900", "Actr49901", "Actr50120",
"Actr50122", "Actr50123", "Actr50125", "Actr50126", "Actr50129",
"Actr50130", "Actr50131", "Actr50133", "Actr50134", "Actr50135",
"Actr50137", "Actr50138", "Actr50143", "Actr50148", "Actr50149",
"Actr50151", "Actr50152", "Actr50154", "Actr50155", "Actr50156",
"Actr50173", "Actr50175", "Actr50182", "Actr50184", "Actr50187",
"Actr50188", "Actr50191", "Actr50192", "Actr50194", "Actr50195",
"Actr50200", "Actr50202", "Actr50203", "Actr50204", "Actr50206",
"Actr50209", "Actr50211", "Actr50212", "Actr50219", "Actr50231",
"Actr50232", "Actr50239", "Actr50240", "Actr50241", "Actr50243",
"Actr50246", "Actr50247", "Actr50249", "Actr50255", "Actr50256",
"Actr50258", "Actr50263", "Actr50265", "Actr50272", "Actr50275",
"Actr50277", "Actr50279", "Actr50281", "Actr50283", "Actr50284",
"Actr50285", "Actr50286", "Actr50287", "Actr50288", "Actr50289",
"Actr50290", "Actr50291", "Actr50292", "Actr50293", "Actr50294",
"Actr50298", "Actr50552", "Actr50556", "Actr50558", "Actr50559",
"Actr50562", "Actr50568", "Actr50605", "Actr50608", "Actr50610",
"Actr50625", "Actr50627", "Actr50630", "Actr50631", "Actr50759",
"Actr50776", "Actr50778"), class = "factor"), Actr22509 = c("1",
"NA", "NA", "NA", "NA"), Actr22510 = c("1", "NA", "NA", "NA",
"NA"), Actr22511 = c("0", "NA", "NA", "NA", "NA"), Actr22955 = c("NA",
"NA", "1", "NA", "NA")), .Names = c("X", "Actr22509", "Actr22510",
"Actr22511", "Actr22955"), row.names = c(NA, 5L), class = "data.frame")
我为此寻找了一个 good/updated 骗子,但没有找到任何好的东西(可能是因为 non-informative 标题),所以这里有 3 种常见的方法来处理这种情况
Base R 使用 reshape
。非常讨厌的解决方案,在这种情况下通常不推荐,因为性能和复杂性。我还建议使用 row.names(Res) <- NULL
删除行名称
reshape(df,
idvar = "X",
varying = list(2:5),
v.names = "value",
timevar = "variable",
times = names(df)[-1],
direction = "long")
# X variable value
# Actr22511.Actr22509 Actr22511 Actr22509 1
# Actr28440.Actr22509 Actr28440 Actr22509 NA
# Actr28464.Actr22509 Actr28464 Actr22509 NA
# Actr28604.Actr22509 Actr28604 Actr22509 NA
# Actr30119.Actr22509 Actr30119 Actr22509 NA
...
reshape2
接近
library(reshape2)
melt(df, "X")
# X variable value
# 1 Actr22511 Actr22509 1
# 2 Actr28440 Actr22509 NA
# 3 Actr28464 Actr22509 NA
# 4 Actr28604 Actr22509 NA
...
以及新的炒作 tidyr
方法(有人可以向我解释一下 easier/better 与 melt
有何不同吗?)
library(tidyr)
gather(df, variable, value, -X)
# X variable value
# 1 Actr22511 Actr22509 1
# 2 Actr28440 Actr22509 NA
# 3 Actr28464 Actr22509 NA
# 4 Actr28604 Actr22509 NA
...
我有一个 302 obs. of 942 variables
的数据框。看起来像这样:
[Actr225009] [Actr225423] [Actr229853] [Actr78542]
[Actr225009] 0 NA 3 NA
[Actr225423] NA 0 1 5
[Actr229853] 8 NA 0 2
...
并且需要为每一行重组它:
Actr225009 Actr225009 0
Actr225009 Actr225423 NA
Actr225009 Actr229853 3
Actr225009 Actr78542 NA ...
我运行输入了这段代码:
dist.acteurs<-data.frame()
pb <- txtProgressBar(min = 0, max = length(test[,1]), style = 3)
for(i in 2: length(test[,1])){#on va jouer sur les lignes
for(j in 2: length(test[1,])){#on va jouer sur les coloumns
tps<-c(as.character(test[i,1]),as.character(test[1,j]),as.character(test[i,j]))
tps<-t(as.data.frame(tps))
dist.acteurs<-rbind(dist.acteurs,tps)
}
setTxtProgressBar(pb, i)
}
close(pb)
但是 运行 需要一整天的时间,我想知道是否可以使用 lapply 或更有效的方法。
编辑:
>dput(test[1:5,1:5])
structure(list(X = structure(1:5, .Label = c("Actr22511", "Actr28440",
"Actr28464", "Actr28604", "Actr30119", "Actr30817", "Actr30819",
"Actr30821", "Actr30822", "Actr30934", "Actr31331", "Actr31332",
"Actr31349", "Actr31369", "Actr32128", "Actr32178", "Actr32190",
"Actr32207", "Actr32208", "Actr32209", "Actr32223", "Actr32232",
"Actr32233", "Actr32234", "Actr32265", "Actr32273", "Actr32274",
"Actr32275", "Actr32510", "Actr32533", "Actr32534", "Actr32796",
"Actr32801", "Actr32803", "Actr32881", "Actr33774", "Actr33776",
"Actr33778", "Actr33842", "Actr33843", "Actr33844", "Actr3603",
"Actr40014", "Actr40020", "Actr42312", "Actr43671", "Actr43766",
"Actr44240", "Actr44241", "Actr44247", "Actr44251", "Actr44252",
"Actr44254", "Actr44255", "Actr44258", "Actr44259", "Actr46745",
"Actr47687", "Actr47715", "Actr47717", "Actr47718", "Actr47720",
"Actr47780", "Actr47783", "Actr47785", "Actr47787", "Actr47790",
"Actr47793", "Actr47796", "Actr47797", "Actr47803", "Actr47807",
"Actr47817", "Actr47818", "Actr47822", "Actr47823", "Actr47824",
"Actr47826", "Actr47828", "Actr47829", "Actr48681", "Actr48789",
"Actr48806", "Actr48809", "Actr48810", "Actr48811", "Actr48813",
"Actr48815", "Actr48861", "Actr48978", "Actr48979", "Actr48981",
"Actr48982", "Actr49053", "Actr49058", "Actr49098", "Actr49101",
"Actr49110", "Actr49116", "Actr49119", "Actr49120", "Actr49186",
"Actr49188", "Actr49189", "Actr49191", "Actr49192", "Actr49196",
"Actr49197", "Actr49199", "Actr49200", "Actr49201", "Actr49202",
"Actr49222", "Actr49224", "Actr49231", "Actr49232", "Actr49234",
"Actr49235", "Actr49236", "Actr49266", "Actr49267", "Actr49271",
"Actr49280", "Actr49424", "Actr49434", "Actr49435", "Actr49436",
"Actr49449", "Actr49452", "Actr49562", "Actr49564", "Actr49567",
"Actr49572", "Actr49573", "Actr49574", "Actr49606", "Actr49608",
"Actr49609", "Actr49611", "Actr49612", "Actr49614", "Actr49615",
"Actr49631", "Actr49634", "Actr49638", "Actr49639", "Actr49644",
"Actr49646", "Actr49649", "Actr49650", "Actr49651", "Actr49652",
"Actr49656", "Actr49658", "Actr49662", "Actr49667", "Actr49668",
"Actr49670", "Actr49672", "Actr49784", "Actr49786", "Actr49787",
"Actr49789", "Actr49794", "Actr49796", "Actr49808", "Actr49810",
"Actr49812", "Actr49815", "Actr49822", "Actr49828", "Actr49838",
"Actr49839", "Actr49840", "Actr49844", "Actr49846", "Actr49847",
"Actr49851", "Actr49852", "Actr49853", "Actr49854", "Actr49858",
"Actr49860", "Actr49863", "Actr49864", "Actr49866", "Actr49869",
"Actr49870", "Actr49871", "Actr49875", "Actr49876", "Actr49877",
"Actr49878", "Actr49879", "Actr49882", "Actr49883", "Actr49884",
"Actr49885", "Actr49886", "Actr49888", "Actr49889", "Actr49892",
"Actr49893", "Actr49894", "Actr49895", "Actr49896", "Actr49897",
"Actr49898", "Actr49899", "Actr49900", "Actr49901", "Actr50120",
"Actr50122", "Actr50123", "Actr50125", "Actr50126", "Actr50129",
"Actr50130", "Actr50131", "Actr50133", "Actr50134", "Actr50135",
"Actr50137", "Actr50138", "Actr50143", "Actr50148", "Actr50149",
"Actr50151", "Actr50152", "Actr50154", "Actr50155", "Actr50156",
"Actr50173", "Actr50175", "Actr50182", "Actr50184", "Actr50187",
"Actr50188", "Actr50191", "Actr50192", "Actr50194", "Actr50195",
"Actr50200", "Actr50202", "Actr50203", "Actr50204", "Actr50206",
"Actr50209", "Actr50211", "Actr50212", "Actr50219", "Actr50231",
"Actr50232", "Actr50239", "Actr50240", "Actr50241", "Actr50243",
"Actr50246", "Actr50247", "Actr50249", "Actr50255", "Actr50256",
"Actr50258", "Actr50263", "Actr50265", "Actr50272", "Actr50275",
"Actr50277", "Actr50279", "Actr50281", "Actr50283", "Actr50284",
"Actr50285", "Actr50286", "Actr50287", "Actr50288", "Actr50289",
"Actr50290", "Actr50291", "Actr50292", "Actr50293", "Actr50294",
"Actr50298", "Actr50552", "Actr50556", "Actr50558", "Actr50559",
"Actr50562", "Actr50568", "Actr50605", "Actr50608", "Actr50610",
"Actr50625", "Actr50627", "Actr50630", "Actr50631", "Actr50759",
"Actr50776", "Actr50778"), class = "factor"), Actr22509 = c("1",
"NA", "NA", "NA", "NA"), Actr22510 = c("1", "NA", "NA", "NA",
"NA"), Actr22511 = c("0", "NA", "NA", "NA", "NA"), Actr22955 = c("NA",
"NA", "1", "NA", "NA")), .Names = c("X", "Actr22509", "Actr22510",
"Actr22511", "Actr22955"), row.names = c(NA, 5L), class = "data.frame")
我为此寻找了一个 good/updated 骗子,但没有找到任何好的东西(可能是因为 non-informative 标题),所以这里有 3 种常见的方法来处理这种情况
Base R 使用 reshape
。非常讨厌的解决方案,在这种情况下通常不推荐,因为性能和复杂性。我还建议使用 row.names(Res) <- NULL
reshape(df,
idvar = "X",
varying = list(2:5),
v.names = "value",
timevar = "variable",
times = names(df)[-1],
direction = "long")
# X variable value
# Actr22511.Actr22509 Actr22511 Actr22509 1
# Actr28440.Actr22509 Actr28440 Actr22509 NA
# Actr28464.Actr22509 Actr28464 Actr22509 NA
# Actr28604.Actr22509 Actr28604 Actr22509 NA
# Actr30119.Actr22509 Actr30119 Actr22509 NA
...
reshape2
接近
library(reshape2)
melt(df, "X")
# X variable value
# 1 Actr22511 Actr22509 1
# 2 Actr28440 Actr22509 NA
# 3 Actr28464 Actr22509 NA
# 4 Actr28604 Actr22509 NA
...
以及新的炒作 tidyr
方法(有人可以向我解释一下 easier/better 与 melt
有何不同吗?)
library(tidyr)
gather(df, variable, value, -X)
# X variable value
# 1 Actr22511 Actr22509 1
# 2 Actr28440 Actr22509 NA
# 3 Actr28464 Actr22509 NA
# 4 Actr28604 Actr22509 NA
...