比较矩阵以找出差异
Compare matrices to find the differences
我有 2 个矩阵,我想比较它们(row.name 明智地)以找出差异。
> head(N1)
Total_Degree Transitivity Betweenness Closeness_All
2410016O06RIK 1 NaN 0.00000 0.0003124024
AGO1 4 0.1666667 37.00000 0.0003133814
APEX1 4 0.6666667 4.00000 0.0003144654
ATR 4 0.1666667 19.50000 0.0003128911
CASP3 24 0.0000000 806.00000 0.0002980626
CCND2 4 0.3333333 97.33333 0.0003132832
head(N2)
Total_Degree Transitivity Betweenness Closeness_All
2410016O06RIK 1 NaN 0.0 2.279982e-04
ADI1 1 NaN 0.0 1.728877e-05
AGO1 3 0.0000000 40.0 2.284670e-04
AIRN 1 NaN 0.0 1.721733e-05
APEX1 3 0.6666667 2.0 2.288330e-04
ATR 3 0.3333333 19.5 2.281542e-04
N1中的许多rows.name确实存在于N2中,我想比较它们并将差异写在新矩阵中。那些N1或N2独有的应该提到它们要么属于N1要么属于N2。
我不确定哪个是计算差异的最佳标准,我能想到的是将 N1 中一行的所有值简单相加,然后从 N2 中相应行的相加值中减去该值。
例如输出应该是:
> head(Compared)
Comparison Unique
2410016O06RIK 0.0002 Common
AGO1 -1.83 Common
APEX1 2.24 Common
ATR 0.0034 Common
CASP3 830.00029 N1
ADI1 1.0007288 N2
此处对于 row.name = 2410016O06RIK
,添加了 N1 和 N2 的所有值,然后将 N1-N2
写入 Comparison
列,因为这一行在两个矩阵,所以 common
写在 Unique
列中。
一种进入基础 R 的方法,rowSums
和 merge
:
如果N1
和N2
是data.frames:
# compute the row sums and merge N1 and N2
N1$rs <- rowSums(N1, na.rm=TRUE)
N2$rs <- rowSums(N2, na.rm=TRUE)
comp <- merge(N1[, "rs", drop=FALSE], N2[, "rs", drop=FALSE], by="row.names", all=TRUE)
# then compare the row sums and the variable "locations"
comp$Unique <- with(comp, c("N1", "N2", "common")[(!is.na(rs.x)) + 2*(!is.na(rs.y))])
comp$Comparison <- with(comp, rs.x-rs.y)
# keep only the variable you need:
comp <- comp[, c(1, 5, 4)]
如果N1
和N2
是矩阵:
# compute the row sums and merge N1 and N2
rs1 <- rowSums(N1, na.rm=TRUE)
rs2 <- rowSums(N2, na.rm=TRUE)
comp <- merge(N1, N2, by="row.names", all=TRUE)
# then compare the row sums and the variable "locations"
comp$Unique <- with(comp, c("N1", "N2", "common")[as.numeric(!is.na(Total_Degree.x)) + 2*as.numeric(!is.na(Total_Degree.y))])
comp$Comparison <- with(merge(as.data.frame(rs1), as.data.frame(rs2), all=TRUE, by="row.names"), rs1-rs2)
# keep only the variable you need:
comp <- comp[, c("Row.names", "Comparison", "Unique")]
两种方法的输出:
comp
# Row.names Comparison Unique
#1 2410016O06RIK 0.0000844042 common
#2 ADI1 NA N2
#3 AGO1 -1.8332483856 common
#4 AIRN NA N2
#5 APEX1 3.0000856324 common
#6 ATR 0.8334181369 common
#7 CASP3 NA N1
#8 CCND2 NA N1
这是解决方案的一部分,在 res
中,您有一个 data.table
可以用于差异部分:
require(data.table)
require(dplyr)
set.seed(2016)
dt1 <- data.table(V1 = c("a", "b", "c", "d"), V2 = rnorm(4))
dt2 <- data.table(V1 = c("c", "d", "e", "f"), V2 = rnorm(4))
# common <- merge(dt1, dt2, by = "V1")[, Unique := "Common"]
# unique1 <- dt1[V1 %nin% dt2[, V1], ][, Unique := "N1"]
# unique2 <- dt2[V1 %nin% dt1[, V1], ][, Unique := "N2"]
# res <- rbind(common, unique1, unique2, fill = TRUE)
@Cath 回答后的小更新,只是为了清楚起见。
allMerged <- merge(dt1, dt2, by = "V1", all = TRUE) %>%
.[, RowSum := rowSums(.SD, na.rm = TRUE), .SDcols = grep("V2", names(.))] %>%
.[, Unique := ((is.na(V2.x) + 2*is.na(V2.y)))]
print(allMerged)
我有 2 个矩阵,我想比较它们(row.name 明智地)以找出差异。
> head(N1)
Total_Degree Transitivity Betweenness Closeness_All
2410016O06RIK 1 NaN 0.00000 0.0003124024
AGO1 4 0.1666667 37.00000 0.0003133814
APEX1 4 0.6666667 4.00000 0.0003144654
ATR 4 0.1666667 19.50000 0.0003128911
CASP3 24 0.0000000 806.00000 0.0002980626
CCND2 4 0.3333333 97.33333 0.0003132832
head(N2)
Total_Degree Transitivity Betweenness Closeness_All
2410016O06RIK 1 NaN 0.0 2.279982e-04
ADI1 1 NaN 0.0 1.728877e-05
AGO1 3 0.0000000 40.0 2.284670e-04
AIRN 1 NaN 0.0 1.721733e-05
APEX1 3 0.6666667 2.0 2.288330e-04
ATR 3 0.3333333 19.5 2.281542e-04
N1中的许多rows.name确实存在于N2中,我想比较它们并将差异写在新矩阵中。那些N1或N2独有的应该提到它们要么属于N1要么属于N2。
我不确定哪个是计算差异的最佳标准,我能想到的是将 N1 中一行的所有值简单相加,然后从 N2 中相应行的相加值中减去该值。
例如输出应该是:
> head(Compared)
Comparison Unique
2410016O06RIK 0.0002 Common
AGO1 -1.83 Common
APEX1 2.24 Common
ATR 0.0034 Common
CASP3 830.00029 N1
ADI1 1.0007288 N2
此处对于 row.name = 2410016O06RIK
,添加了 N1 和 N2 的所有值,然后将 N1-N2
写入 Comparison
列,因为这一行在两个矩阵,所以 common
写在 Unique
列中。
一种进入基础 R 的方法,rowSums
和 merge
:
如果N1
和N2
是data.frames:
# compute the row sums and merge N1 and N2
N1$rs <- rowSums(N1, na.rm=TRUE)
N2$rs <- rowSums(N2, na.rm=TRUE)
comp <- merge(N1[, "rs", drop=FALSE], N2[, "rs", drop=FALSE], by="row.names", all=TRUE)
# then compare the row sums and the variable "locations"
comp$Unique <- with(comp, c("N1", "N2", "common")[(!is.na(rs.x)) + 2*(!is.na(rs.y))])
comp$Comparison <- with(comp, rs.x-rs.y)
# keep only the variable you need:
comp <- comp[, c(1, 5, 4)]
如果N1
和N2
是矩阵:
# compute the row sums and merge N1 and N2
rs1 <- rowSums(N1, na.rm=TRUE)
rs2 <- rowSums(N2, na.rm=TRUE)
comp <- merge(N1, N2, by="row.names", all=TRUE)
# then compare the row sums and the variable "locations"
comp$Unique <- with(comp, c("N1", "N2", "common")[as.numeric(!is.na(Total_Degree.x)) + 2*as.numeric(!is.na(Total_Degree.y))])
comp$Comparison <- with(merge(as.data.frame(rs1), as.data.frame(rs2), all=TRUE, by="row.names"), rs1-rs2)
# keep only the variable you need:
comp <- comp[, c("Row.names", "Comparison", "Unique")]
两种方法的输出:
comp
# Row.names Comparison Unique
#1 2410016O06RIK 0.0000844042 common
#2 ADI1 NA N2
#3 AGO1 -1.8332483856 common
#4 AIRN NA N2
#5 APEX1 3.0000856324 common
#6 ATR 0.8334181369 common
#7 CASP3 NA N1
#8 CCND2 NA N1
这是解决方案的一部分,在 res
中,您有一个 data.table
可以用于差异部分:
require(data.table)
require(dplyr)
set.seed(2016)
dt1 <- data.table(V1 = c("a", "b", "c", "d"), V2 = rnorm(4))
dt2 <- data.table(V1 = c("c", "d", "e", "f"), V2 = rnorm(4))
# common <- merge(dt1, dt2, by = "V1")[, Unique := "Common"]
# unique1 <- dt1[V1 %nin% dt2[, V1], ][, Unique := "N1"]
# unique2 <- dt2[V1 %nin% dt1[, V1], ][, Unique := "N2"]
# res <- rbind(common, unique1, unique2, fill = TRUE)
@Cath 回答后的小更新,只是为了清楚起见。
allMerged <- merge(dt1, dt2, by = "V1", all = TRUE) %>%
.[, RowSum := rowSums(.SD, na.rm = TRUE), .SDcols = grep("V2", names(.))] %>%
.[, Unique := ((is.na(V2.x) + 2*is.na(V2.y)))]
print(allMerged)