如何将 tibble 中的行乘以另一个 tibble 中的另一个对应行
How to multiply rows in a tibble to another corresponding rows in another tibble
我有两个问题,第一个是这个。
input_data <- tibble::tribble(
# Number of samples can be more than 2.
# Number of genes around 24K
~Genes, ~Sample1, ~Sample2,
"Ncr1", 8.2, 10.10,
"Il1f9", 3.2, 20.30,
"Stfa2l1", 2.3, 0.3,
"Klra10", 5.5, 12.0,
"Dcn", 1.8, 0,
"Cxcr2", 1.3, 1.1,
"Foo", 20, 70
)
input_data
#> # A tibble: 7 × 3
#> Genes Sample1 Sample2
#> <chr> <dbl> <dbl>
#> 1 Ncr1 8.2 10.1
#> 2 Il1f9 3.2 20.3
#> 3 Stfa2l1 2.3 0.3
#> 4 Klra10 5.5 12.0
#> 5 Dcn 1.8 0.0
#> 6 Cxcr2 1.3 1.1
#> 7 Foo 20.0 70.0
第二个是这个,
fixed_score <- tibble::tribble(
# Number of non genes column can be more than 5.
~Genes, ~B, ~Mac, ~NK, ~Neu, ~Stro,
"Ncr1", 0.087, 0.151, 0.495, 0.002, 0.004,
"Il1f9", 0.154, 0.099, 0.002, 0.333, 0.005,
"Stfa2l1", 0.208, 0.111, 0.002, 0.332, 0.005,
"Klra10", 0.085, 0.139, 0.496, 0.001, 0.004,
"Dcn", 0.132, 0.358, 0.003, 0.003, 0.979,
"Cxcr2", 0.132, 0.358, 0.003, 0.003, 0.979
)
fixed_score
#> # A tibble: 6 × 6
#> Genes B Mac NK Neu Stro
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Ncr1 0.087 0.151 0.495 0.002 0.004
#> 2 Il1f9 0.154 0.099 0.002 0.333 0.005
#> 3 Stfa2l1 0.208 0.111 0.002 0.332 0.005
#> 4 Klra10 0.085 0.139 0.496 0.001 0.004
#> 5 Dcn 0.132 0.358 0.003 0.003 0.979
#> 6 Cxcr2 0.132 0.358 0.003 0.003 0.979
我想做的是将每个 Sample1
(和 Sample2
)的值相乘
fixed_score
.
中相应的基因行值
为 Sample1
生成这个
B Mac NK Neu Stro
Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
因此,在上面的结果中,我们通过以下方式获得值:
Ncr1 (sample1) x Ncr1 (fixed_score B) = 8.2 x 0.87 = 7.134
Il1f9 (sample1) x Il1f9 (fixed_score B) = 3.2 x 0.154 = 0.493
Sample2
的结果是这样的:
B Mac NK Neu Stro
Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
如何使用 data.table 或 dplyr 做到这一点?由于我们的行数
非常大。最好有快速的方法。
我们可以使用tidyverse
library(tidyverse)
input_data %>%
#remove the 'Genes' column
select(-matches("Genes")) %>%
#loop the other columns cbind with the Genes column
map(~bind_cols(input_data['Genes'], Sample=.)) %>%
#left join with 'fixed_score' dataset by 'Genes'
map(~left_join(fixed_score, ., by = "Genes")) %>%
#multiply the columns selected in 'vars' with 'Sample'
map(~mutate_at(., vars(B:Stro), funs(.*Sample))) %>%
#remove the 'Sample' column from the list of tibbles
map(~select(., -matches("Sample")))
#$Sample1
# A tibble: 6 × 6
# Genes B Mac NK Neu Stro
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
#2 Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
#3 Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
#4 Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
#5 Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
#6 Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
#$Sample2
# A tibble: 6 × 6
# Genes B Mac NK Neu Stro
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
#2 Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
#3 Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
#4 Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
#5 Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
#6 Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
如果您希望速度快,只需使用矩阵即可。
让我们创建您的矩阵(它们首先应该是怎样的)
input_mat <- as.matrix(input_data[-1])
row.names(input_mat) <- unlist(input_data[, 1])
fixed_mat <- as.matrix(fixed_score[-1])
row.names(fixed_mat) <- unlist(fixed_score[, 1])
然后,你可以简单地做
lapply(colnames(input_mat), function(x) input_mat[rownames(fixed_mat), x] * fixed_mat)
# [[1]]
# B Mac NK Neu Stro
# Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
# Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
# Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
# Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
# Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
# Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
#
# [[2]]
# B Mac NK Neu Stro
# Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
# Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
# Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
# Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
# Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
# Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
这应该很快
我有两个问题,第一个是这个。
input_data <- tibble::tribble(
# Number of samples can be more than 2.
# Number of genes around 24K
~Genes, ~Sample1, ~Sample2,
"Ncr1", 8.2, 10.10,
"Il1f9", 3.2, 20.30,
"Stfa2l1", 2.3, 0.3,
"Klra10", 5.5, 12.0,
"Dcn", 1.8, 0,
"Cxcr2", 1.3, 1.1,
"Foo", 20, 70
)
input_data
#> # A tibble: 7 × 3
#> Genes Sample1 Sample2
#> <chr> <dbl> <dbl>
#> 1 Ncr1 8.2 10.1
#> 2 Il1f9 3.2 20.3
#> 3 Stfa2l1 2.3 0.3
#> 4 Klra10 5.5 12.0
#> 5 Dcn 1.8 0.0
#> 6 Cxcr2 1.3 1.1
#> 7 Foo 20.0 70.0
第二个是这个,
fixed_score <- tibble::tribble(
# Number of non genes column can be more than 5.
~Genes, ~B, ~Mac, ~NK, ~Neu, ~Stro,
"Ncr1", 0.087, 0.151, 0.495, 0.002, 0.004,
"Il1f9", 0.154, 0.099, 0.002, 0.333, 0.005,
"Stfa2l1", 0.208, 0.111, 0.002, 0.332, 0.005,
"Klra10", 0.085, 0.139, 0.496, 0.001, 0.004,
"Dcn", 0.132, 0.358, 0.003, 0.003, 0.979,
"Cxcr2", 0.132, 0.358, 0.003, 0.003, 0.979
)
fixed_score
#> # A tibble: 6 × 6
#> Genes B Mac NK Neu Stro
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Ncr1 0.087 0.151 0.495 0.002 0.004
#> 2 Il1f9 0.154 0.099 0.002 0.333 0.005
#> 3 Stfa2l1 0.208 0.111 0.002 0.332 0.005
#> 4 Klra10 0.085 0.139 0.496 0.001 0.004
#> 5 Dcn 0.132 0.358 0.003 0.003 0.979
#> 6 Cxcr2 0.132 0.358 0.003 0.003 0.979
我想做的是将每个 Sample1
(和 Sample2
)的值相乘
fixed_score
.
为 Sample1
B Mac NK Neu Stro
Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
因此,在上面的结果中,我们通过以下方式获得值:
Ncr1 (sample1) x Ncr1 (fixed_score B) = 8.2 x 0.87 = 7.134
Il1f9 (sample1) x Il1f9 (fixed_score B) = 3.2 x 0.154 = 0.493
Sample2
的结果是这样的:
B Mac NK Neu Stro
Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
如何使用 data.table 或 dplyr 做到这一点?由于我们的行数 非常大。最好有快速的方法。
我们可以使用tidyverse
library(tidyverse)
input_data %>%
#remove the 'Genes' column
select(-matches("Genes")) %>%
#loop the other columns cbind with the Genes column
map(~bind_cols(input_data['Genes'], Sample=.)) %>%
#left join with 'fixed_score' dataset by 'Genes'
map(~left_join(fixed_score, ., by = "Genes")) %>%
#multiply the columns selected in 'vars' with 'Sample'
map(~mutate_at(., vars(B:Stro), funs(.*Sample))) %>%
#remove the 'Sample' column from the list of tibbles
map(~select(., -matches("Sample")))
#$Sample1
# A tibble: 6 × 6
# Genes B Mac NK Neu Stro
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
#2 Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
#3 Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
#4 Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
#5 Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
#6 Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
#$Sample2
# A tibble: 6 × 6
# Genes B Mac NK Neu Stro
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
#2 Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
#3 Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
#4 Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
#5 Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
#6 Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
如果您希望速度快,只需使用矩阵即可。
让我们创建您的矩阵(它们首先应该是怎样的)
input_mat <- as.matrix(input_data[-1])
row.names(input_mat) <- unlist(input_data[, 1])
fixed_mat <- as.matrix(fixed_score[-1])
row.names(fixed_mat) <- unlist(fixed_score[, 1])
然后,你可以简单地做
lapply(colnames(input_mat), function(x) input_mat[rownames(fixed_mat), x] * fixed_mat)
# [[1]]
# B Mac NK Neu Stro
# Ncr1 0.7134 1.2382 4.0590 0.0164 0.0328
# Il1f9 0.4928 0.3168 0.0064 1.0656 0.0160
# Stfa2l1 0.4784 0.2553 0.0046 0.7636 0.0115
# Klra10 0.4675 0.7645 2.7280 0.0055 0.0220
# Dcn 0.2376 0.6444 0.0054 0.0054 1.7622
# Cxcr2 0.1716 0.4654 0.0039 0.0039 1.2727
#
# [[2]]
# B Mac NK Neu Stro
# Ncr1 0.8787 1.5251 4.9995 0.0202 0.0404
# Il1f9 3.1262 2.0097 0.0406 6.7599 0.1015
# Stfa2l1 0.0624 0.0333 0.0006 0.0996 0.0015
# Klra10 1.0200 1.6680 5.9520 0.0120 0.0480
# Dcn 0.0000 0.0000 0.0000 0.0000 0.0000
# Cxcr2 0.1452 0.3938 0.0033 0.0033 1.0769
这应该很快