R从矩阵中提取具有不同值的行
R extracting rows with different values from matrices
我有两个结构相同的矩阵 m1 和 m2,现在我需要创建 2 个新矩阵
1)第一个(称为 Partenza)具有来自 m1 的所有与 m2 不同的行
2) 第二个 (calle Arrivo) 从 m2 开始有相同的行
m1
row_num datoA datoB
1 p f
3 h b
5 c m
6 c r
9 m f
14 a b
m2
row_num datoA datoB
1 p f
3 h b
5 c g
6 a r
9 m f
14 x j
我的结果应该是:
Partenza (taken from m1)
row_num datoA datoB
5 c m
6 c r
14 a b
Arrivo (taken from m2)
row_num datoA datoB
5 c g
6 a r
14 x j
我试过了
zzz <- setdiff(m1,m2)
partenza<-m1[m1[,"ROW_NUM"] %in% zzz,]
arrivo<- m1[m1[,"ROW_NUM"] %in% zzz,]
但是告诉我 zzz 总是空的(我相信它不应该是空的!)
使用 dplyr 包中的 anti_join
函数:
package(dplyr)
m1 <- data.frame(
row_num = c(1,3,5,6,9,14),
datoA = c("p","h","c","c","m","a"),
datoB = c("f","b","m","r","f","b")
)
m2 <- data.frame(
row_num = c(1,3,5,6,9,14),
datoA = c("p","h","c","a","m","x"),
datoB = c("f","b","g","r","f","j")
)
Partenza <- anti_join(m1,m2) %>% arrange(row_num)
Arrivo <- anti_join(m2,m1) %>% arrange(row_num)
在base
R(不需要包)中,你可以这样做:
diffrow<-sapply(1:nrow(m1),function(x) !all(m1[x,]==m2[x,]))
partenza<-m1[diffrow,]
arrivo<-m2[diffrow,]
另一种选择,使用 dplyr
包中的 setdiff
函数:
library(dplyr)
partenza<-setdiff(m1,m2)
arrivo<-setdiff(m2,m1)
在这两种情况下,您将得到:
> partenza
# row_num datoA datoB
#3 5 c m
#4 6 c r
#6 14 a b
> arrivo
# row_num datoA datoB
#3 5 c g
#4 6 a r
#6 14 x j
数据:
m1<-structure(list(row_num = c(1L, 3L, 5L, 6L, 9L, 14L), datoA = c("p",
"h", "c", "c", "m", "a"), datoB = c("f", "b", "m", "r", "f",
"b")), .Names = c("row_num", "datoA", "datoB"), class = "data.frame", row.names = c(NA,
-6L))
m2<-structure(list(row_num = c(1L, 3L, 5L, 6L, 9L, 14L), datoA = c("p",
"h", "c", "a", "m", "x"), datoB = c("f", "b", "g", "r", "f",
"j")), .Names = c("row_num", "datoA", "datoB"), class = "data.frame", row.names = c(NA,
-6L))
我有两个结构相同的矩阵 m1 和 m2,现在我需要创建 2 个新矩阵
1)第一个(称为 Partenza)具有来自 m1 的所有与 m2 不同的行
2) 第二个 (calle Arrivo) 从 m2 开始有相同的行
m1
row_num datoA datoB
1 p f
3 h b
5 c m
6 c r
9 m f
14 a b
m2
row_num datoA datoB
1 p f
3 h b
5 c g
6 a r
9 m f
14 x j
我的结果应该是:
Partenza (taken from m1)
row_num datoA datoB
5 c m
6 c r
14 a b
Arrivo (taken from m2)
row_num datoA datoB
5 c g
6 a r
14 x j
我试过了
zzz <- setdiff(m1,m2)
partenza<-m1[m1[,"ROW_NUM"] %in% zzz,]
arrivo<- m1[m1[,"ROW_NUM"] %in% zzz,]
但是告诉我 zzz 总是空的(我相信它不应该是空的!)
使用 dplyr 包中的 anti_join
函数:
package(dplyr)
m1 <- data.frame(
row_num = c(1,3,5,6,9,14),
datoA = c("p","h","c","c","m","a"),
datoB = c("f","b","m","r","f","b")
)
m2 <- data.frame(
row_num = c(1,3,5,6,9,14),
datoA = c("p","h","c","a","m","x"),
datoB = c("f","b","g","r","f","j")
)
Partenza <- anti_join(m1,m2) %>% arrange(row_num)
Arrivo <- anti_join(m2,m1) %>% arrange(row_num)
在base
R(不需要包)中,你可以这样做:
diffrow<-sapply(1:nrow(m1),function(x) !all(m1[x,]==m2[x,]))
partenza<-m1[diffrow,]
arrivo<-m2[diffrow,]
另一种选择,使用 dplyr
包中的 setdiff
函数:
library(dplyr)
partenza<-setdiff(m1,m2)
arrivo<-setdiff(m2,m1)
在这两种情况下,您将得到:
> partenza
# row_num datoA datoB
#3 5 c m
#4 6 c r
#6 14 a b
> arrivo
# row_num datoA datoB
#3 5 c g
#4 6 a r
#6 14 x j
数据:
m1<-structure(list(row_num = c(1L, 3L, 5L, 6L, 9L, 14L), datoA = c("p",
"h", "c", "c", "m", "a"), datoB = c("f", "b", "m", "r", "f",
"b")), .Names = c("row_num", "datoA", "datoB"), class = "data.frame", row.names = c(NA,
-6L))
m2<-structure(list(row_num = c(1L, 3L, 5L, 6L, 9L, 14L), datoA = c("p",
"h", "c", "a", "m", "x"), datoB = c("f", "b", "g", "r", "f",
"j")), .Names = c("row_num", "datoA", "datoB"), class = "data.frame", row.names = c(NA,
-6L))