如何将列表的列表转换为整齐的小标题或 R 中的 data.frame
How to convert list of list into tidy tibble or data.frame in R
我有以下列表:
my_lol <- structure(list(coolfactor_score = list(structure(c(0.164477631065473,
0.198253819406019, 0.396414447052519, 0.133118603987442, 0.107735498488546
), .Names = c("B", "Mac", "NK", "Neu", "Stro")), structure(c(0.186215537135912,
0.18408529174803, 0.375349920115798, 0.247664923324821, 0.006684327675438
), .Names = c("B", "Mac", "NK", "Neu", "Stro"))), sr_crt = list(
structure(list(crt = 0.133118603987442, sr = 0.407076876403305), .Names = c("crt",
"sr")), structure(list(crt = 0.18408529174803, sr = 0.0829181742326453), .Names = c("crt",
"sr"))), sample_names = c("Sample1", "Sample2")), .Names = c("coolfactor_score",
"sr_crt", "sample_names"))
看起来像这样:
> my_lol
$coolfactor_score
$coolfactor_score[[1]]
B Mac NK Neu Stro
0.1644776 0.1982538 0.3964144 0.1331186 0.1077355
$coolfactor_score[[2]]
B Mac NK Neu Stro
0.186215537 0.184085292 0.375349920 0.247664923 0.006684328
$sr_crt
$sr_crt[[1]]
$sr_crt[[1]]$crt
[1] 0.1331186
$sr_crt[[1]]$sr
[1] 0.4070769
$sr_crt[[2]]
$sr_crt[[2]]$crt
[1] 0.1840853
$sr_crt[[2]]$sr
[1] 0.08291817
$sample_names
[1] "Sample1" "Sample2"
# Note that the number of samples can be more than 2 and cell type more than 5.
如何将其整理到此数据框中(小标题)
CellType Sample CoolFactorScore SR CRT
B Sample1 0.1644776 0.4070769 0.1331186
Mac Sample1 0.1982538 0.4070769 0.1331186
NK Sample1 0.3964144 0.4070769 0.1331186
Neu Sample1 0.1331186 0.4070769 0.1331186
Stro Sample1 0.1077355 0.4070769 0.1331186
B Sample2 0.186215537 0.08291817 0.1840853
Mac Sample2 0.184085292 0.08291817 0.1840853
NK Sample2 0.375349920 0.08291817 0.1840853
Neu Sample2 0.247664923 0.08291817 0.1840853
Stro Sample2 0.006684328 0.08291817 0.1840853
一种使用基础 R 的方法:
mylist <- lapply(1:2, function(i) {
#this is the important bit where you extract the corresponding elements
#of sample 1 first and sample 2 second.
df <- data.frame(lapply(my_lol, '[', i))
names(df) <- c('CoolFactorScore', 'CRT', 'SR', 'Sample')
df$CellType <- rownames(df)
row.names(df) <- NULL
df
})
do.call(rbind, mylist)
输出:
CoolFactorScore CRT SR Sample CellType
1 0.164477631 0.1331186 0.40707688 Sample1 B
2 0.198253819 0.1331186 0.40707688 Sample1 Mac
3 0.396414447 0.1331186 0.40707688 Sample1 NK
4 0.133118604 0.1331186 0.40707688 Sample1 Neu
5 0.107735498 0.1331186 0.40707688 Sample1 Stro
6 0.186215537 0.1840853 0.08291817 Sample2 B
7 0.184085292 0.1840853 0.08291817 Sample2 Mac
8 0.375349920 0.1840853 0.08291817 Sample2 NK
9 0.247664923 0.1840853 0.08291817 Sample2 Neu
10 0.006684328 0.1840853 0.08291817 Sample2 Stro
这里有一个不太优雅的方法:
int <- lapply(1:2, function(x) do.call(data.frame,
c(list(CoolFactorScore=my_lol[[1]][[x]]),
my_lol[[2]][[x]],
list(Sample=my_lol[[3]][[x]]))))
do.call(rbind, int)
CoolFactorScore crt sr Sample
B 0.164477631 0.1331186 0.40707688 Sample1
Mac 0.198253819 0.1331186 0.40707688 Sample1
NK 0.396414447 0.1331186 0.40707688 Sample1
Neu 0.133118604 0.1331186 0.40707688 Sample1
Stro 0.107735498 0.1331186 0.40707688 Sample1
B1 0.186215537 0.1840853 0.08291817 Sample2
Mac1 0.184085292 0.1840853 0.08291817 Sample2
NK1 0.375349920 0.1840853 0.08291817 Sample2
Neu1 0.247664923 0.1840853 0.08291817 Sample2
Stro1 0.006684328 0.1840853 0.08291817 Sample2
这是一个没有循环的解决方案,使用了 data.table 包的功能。
library(data.table)
第 1 步:展开列表
unlist(my_lol) -> tmp1
第 2 步:转置并转换为 data.table
这样您将获得可以由原始数据组成的最宽的 table 。它应该根据要求(在进一步的步骤中)转换为 long table。
as.data.table(t(tmp1)) -> tmp2
第三步:需要将'sample_names1'和'sample_names2'手动转换为'Sample'。
如果您想泛化到多个 sample_names 值,那么您应该根据可能值的语法修改此步骤。(此版本适用于此类 sample_names 值语法如:'Sample1'、'Sample2'、'Sample3' 等等。)
names(tmp2) <- gsub('sample_names\d+', 'Sample', names(tmp2))
第四步:根据tmp2的字段名创建度量字段名table
measure <- unique(names(tmp2))
第 5 步:从宽 table (tmp2)
创建更长的 table (tmp3)
tmp3 <- melt(tmp2,
measure.vars = patterns(measure),
value.name = measure)
第 6 步:根据要求重命名列
names(tmp3) <- gsub('coolfactor_score.', '', names(tmp3))
names(tmp3) <- gsub('sr_crt.', '', names(tmp3))
setnames(tmp3, 'crt', 'CRT')
setnames(tmp3, 'sr', 'SR')
第 7 步:从 tmp3
创建更长的 table (mylist)
mylist <- melt(tmp3,
id.vars = c('Sample',
'CRT',
'SR'),
measure.vars = c('B',
'Mac',
'NK',
'Neu',
'Stro'),
value.name = 'CoolFactorScore',
variable.name = 'CellType')
第 8 步:根据要求对列重新排序
setcolorder(mylist, c('CellType', 'Sample', 'CoolFactorScore', 'SR', 'CRT'))
第 9 步:根据请求重新排序行
mylist <- mylist[order(Sample, CellType)]
我有以下列表:
my_lol <- structure(list(coolfactor_score = list(structure(c(0.164477631065473,
0.198253819406019, 0.396414447052519, 0.133118603987442, 0.107735498488546
), .Names = c("B", "Mac", "NK", "Neu", "Stro")), structure(c(0.186215537135912,
0.18408529174803, 0.375349920115798, 0.247664923324821, 0.006684327675438
), .Names = c("B", "Mac", "NK", "Neu", "Stro"))), sr_crt = list(
structure(list(crt = 0.133118603987442, sr = 0.407076876403305), .Names = c("crt",
"sr")), structure(list(crt = 0.18408529174803, sr = 0.0829181742326453), .Names = c("crt",
"sr"))), sample_names = c("Sample1", "Sample2")), .Names = c("coolfactor_score",
"sr_crt", "sample_names"))
看起来像这样:
> my_lol
$coolfactor_score
$coolfactor_score[[1]]
B Mac NK Neu Stro
0.1644776 0.1982538 0.3964144 0.1331186 0.1077355
$coolfactor_score[[2]]
B Mac NK Neu Stro
0.186215537 0.184085292 0.375349920 0.247664923 0.006684328
$sr_crt
$sr_crt[[1]]
$sr_crt[[1]]$crt
[1] 0.1331186
$sr_crt[[1]]$sr
[1] 0.4070769
$sr_crt[[2]]
$sr_crt[[2]]$crt
[1] 0.1840853
$sr_crt[[2]]$sr
[1] 0.08291817
$sample_names
[1] "Sample1" "Sample2"
# Note that the number of samples can be more than 2 and cell type more than 5.
如何将其整理到此数据框中(小标题)
CellType Sample CoolFactorScore SR CRT
B Sample1 0.1644776 0.4070769 0.1331186
Mac Sample1 0.1982538 0.4070769 0.1331186
NK Sample1 0.3964144 0.4070769 0.1331186
Neu Sample1 0.1331186 0.4070769 0.1331186
Stro Sample1 0.1077355 0.4070769 0.1331186
B Sample2 0.186215537 0.08291817 0.1840853
Mac Sample2 0.184085292 0.08291817 0.1840853
NK Sample2 0.375349920 0.08291817 0.1840853
Neu Sample2 0.247664923 0.08291817 0.1840853
Stro Sample2 0.006684328 0.08291817 0.1840853
一种使用基础 R 的方法:
mylist <- lapply(1:2, function(i) {
#this is the important bit where you extract the corresponding elements
#of sample 1 first and sample 2 second.
df <- data.frame(lapply(my_lol, '[', i))
names(df) <- c('CoolFactorScore', 'CRT', 'SR', 'Sample')
df$CellType <- rownames(df)
row.names(df) <- NULL
df
})
do.call(rbind, mylist)
输出:
CoolFactorScore CRT SR Sample CellType
1 0.164477631 0.1331186 0.40707688 Sample1 B
2 0.198253819 0.1331186 0.40707688 Sample1 Mac
3 0.396414447 0.1331186 0.40707688 Sample1 NK
4 0.133118604 0.1331186 0.40707688 Sample1 Neu
5 0.107735498 0.1331186 0.40707688 Sample1 Stro
6 0.186215537 0.1840853 0.08291817 Sample2 B
7 0.184085292 0.1840853 0.08291817 Sample2 Mac
8 0.375349920 0.1840853 0.08291817 Sample2 NK
9 0.247664923 0.1840853 0.08291817 Sample2 Neu
10 0.006684328 0.1840853 0.08291817 Sample2 Stro
这里有一个不太优雅的方法:
int <- lapply(1:2, function(x) do.call(data.frame,
c(list(CoolFactorScore=my_lol[[1]][[x]]),
my_lol[[2]][[x]],
list(Sample=my_lol[[3]][[x]]))))
do.call(rbind, int)
CoolFactorScore crt sr Sample
B 0.164477631 0.1331186 0.40707688 Sample1
Mac 0.198253819 0.1331186 0.40707688 Sample1
NK 0.396414447 0.1331186 0.40707688 Sample1
Neu 0.133118604 0.1331186 0.40707688 Sample1
Stro 0.107735498 0.1331186 0.40707688 Sample1
B1 0.186215537 0.1840853 0.08291817 Sample2
Mac1 0.184085292 0.1840853 0.08291817 Sample2
NK1 0.375349920 0.1840853 0.08291817 Sample2
Neu1 0.247664923 0.1840853 0.08291817 Sample2
Stro1 0.006684328 0.1840853 0.08291817 Sample2
这是一个没有循环的解决方案,使用了 data.table 包的功能。
library(data.table)
第 1 步:展开列表
unlist(my_lol) -> tmp1
第 2 步:转置并转换为 data.table
这样您将获得可以由原始数据组成的最宽的 table 。它应该根据要求(在进一步的步骤中)转换为 long table。
as.data.table(t(tmp1)) -> tmp2
第三步:需要将'sample_names1'和'sample_names2'手动转换为'Sample'。
如果您想泛化到多个 sample_names 值,那么您应该根据可能值的语法修改此步骤。(此版本适用于此类 sample_names 值语法如:'Sample1'、'Sample2'、'Sample3' 等等。)
names(tmp2) <- gsub('sample_names\d+', 'Sample', names(tmp2))
第四步:根据tmp2的字段名创建度量字段名table
measure <- unique(names(tmp2))
第 5 步:从宽 table (tmp2)
tmp3 <- melt(tmp2,
measure.vars = patterns(measure),
value.name = measure)
第 6 步:根据要求重命名列
names(tmp3) <- gsub('coolfactor_score.', '', names(tmp3))
names(tmp3) <- gsub('sr_crt.', '', names(tmp3))
setnames(tmp3, 'crt', 'CRT')
setnames(tmp3, 'sr', 'SR')
第 7 步:从 tmp3
mylist <- melt(tmp3,
id.vars = c('Sample',
'CRT',
'SR'),
measure.vars = c('B',
'Mac',
'NK',
'Neu',
'Stro'),
value.name = 'CoolFactorScore',
variable.name = 'CellType')
第 8 步:根据要求对列重新排序
setcolorder(mylist, c('CellType', 'Sample', 'CoolFactorScore', 'SR', 'CRT'))
第 9 步:根据请求重新排序行
mylist <- mylist[order(Sample, CellType)]