R:以某种方式识别两个数据框中的值
R: identification of values in two dataframe in a certain way
我有两个看起来几乎相同的数据框,我想以某种方式识别 Data_1 中不在 Data_2 中的值。
我有两个大数据,如下所示:
数据帧 1:
Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
> Data_1
Animal season ROOM
1 bird S1 111
2 Blue Catfish S1 222
3 Cat S2 444
4 Buffalo S2 222
5 Lion S3 111
6 Monkey S4 444
7 Horse S4 222
8 Butterfly S15 111
9 Ant S3 222
10 elephant S2 111
11 Snake S3 444
12 Chameloen S1 222
13 Cow S3 111
数据框 2:
Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)
Data_2<-data.frame(Animal,season, ROOM)
> Data_2
Animal season ROOM
1 bird S1 111
2 Mouse S1 222
3 Cat S2 444
4 Zebra S2 222
5 Lion S3 111
6 Monkey S4 444
7 Horse S4 222
8 Leopard S15 111
9 Ant S3 222
10 elephant S2 111
11 Bison S3 444
我想比较两个数据框并识别 Data_1 中不是 Data_2 的动物名称。这应该是 identify pr season pr room。例如,两个数据框中的第 S2 季房间 222 不匹配,这里应该 return 动物的名称。
关于如何做到这一点有什么建议吗?
您也可以使用left_join()
来检查。
Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)
Data_2<-data.frame(Animal,season, ROOM)
Data_1 %>%
left_join(Data_2,by = c('season','ROOM'),suffix=c('_1','_2')) %>%
filter(Animal_1!=Animal_2)
输出
Animal_1 season ROOM Animal_2
1 Blue Catfish S1 222 Mouse
2 Buffalo S2 222 Zebra
3 Butterfly S15 111 Leopard
4 Snake S3 444 Bison
5 Chameloen S1 222 Mouse
6 Cow S3 111 Lion
我们可以使用 anti_join
library(dplyr)
anti_join(Data_1, Data_2, by = c("Animal", "season"))
Animal season ROOM
1 Blue Catfish S1 222
2 Buffalo S2 222
3 Butterfly S15 111
4 Snake S3 444
5 Chameloen S1 222
6 Cow S3 111
使用data.table
:
library(data.table)
setkey(setDT(Data_1), season, ROOM, Animal)
setkey(setDT(Data_2), season, ROOM, Animal)
Data_1[!Data_2]
## Animal season ROOM
## 1: Blue Catfish S1 222
## 2: Chameloen S1 222
## 3: Butterfly S15 111
## 4: Buffalo S2 222
## 5: Cow S3 111
## 6: Snake S3 444
我有两个看起来几乎相同的数据框,我想以某种方式识别 Data_1 中不在 Data_2 中的值。 我有两个大数据,如下所示:
数据帧 1:
Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
> Data_1
Animal season ROOM
1 bird S1 111
2 Blue Catfish S1 222
3 Cat S2 444
4 Buffalo S2 222
5 Lion S3 111
6 Monkey S4 444
7 Horse S4 222
8 Butterfly S15 111
9 Ant S3 222
10 elephant S2 111
11 Snake S3 444
12 Chameloen S1 222
13 Cow S3 111
数据框 2:
Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)
Data_2<-data.frame(Animal,season, ROOM)
> Data_2
Animal season ROOM
1 bird S1 111
2 Mouse S1 222
3 Cat S2 444
4 Zebra S2 222
5 Lion S3 111
6 Monkey S4 444
7 Horse S4 222
8 Leopard S15 111
9 Ant S3 222
10 elephant S2 111
11 Bison S3 444
我想比较两个数据框并识别 Data_1 中不是 Data_2 的动物名称。这应该是 identify pr season pr room。例如,两个数据框中的第 S2 季房间 222 不匹配,这里应该 return 动物的名称。 关于如何做到这一点有什么建议吗?
您也可以使用left_join()
来检查。
Animal<-c("bird","Blue Catfish","Cat","Buffalo","Lion","Monkey","Horse", "Butterfly", "Ant", "elephant","Snake",
"Chameloen","Cow")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3","S1","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444,222,111)
Data_1<-data.frame(Animal,season, ROOM)
Animal<-c("bird","Mouse","Cat","Zebra","Lion","Monkey","Horse", "Leopard", "Ant", "elephant","Bison")
season<-c("S1", "S1","S2","S2","S3","S4","S4","S15","S3","S2","S3")
ROOM<-c(111,222,444,222,111,444,222,111,222,111,444)
Data_2<-data.frame(Animal,season, ROOM)
Data_1 %>%
left_join(Data_2,by = c('season','ROOM'),suffix=c('_1','_2')) %>%
filter(Animal_1!=Animal_2)
输出
Animal_1 season ROOM Animal_2
1 Blue Catfish S1 222 Mouse
2 Buffalo S2 222 Zebra
3 Butterfly S15 111 Leopard
4 Snake S3 444 Bison
5 Chameloen S1 222 Mouse
6 Cow S3 111 Lion
我们可以使用 anti_join
library(dplyr)
anti_join(Data_1, Data_2, by = c("Animal", "season"))
Animal season ROOM
1 Blue Catfish S1 222
2 Buffalo S2 222
3 Butterfly S15 111
4 Snake S3 444
5 Chameloen S1 222
6 Cow S3 111
使用data.table
:
library(data.table)
setkey(setDT(Data_1), season, ROOM, Animal)
setkey(setDT(Data_2), season, ROOM, Animal)
Data_1[!Data_2]
## Animal season ROOM
## 1: Blue Catfish S1 222
## 2: Chameloen S1 222
## 3: Butterfly S15 111
## 4: Buffalo S2 222
## 5: Cow S3 111
## 6: Snake S3 444