检查一个数据框的给定 id 在另一个数据框中是否有特定值
Checking whether there is a certain value for a given id of one data frame in another
我有两个 data.frames "a" 和 "b".
str(a)
'data.frame': 1597 obs. of 2 variables:
$ id : int ...
$ age: num ...
> str(b)
'data.frame': 12877 obs. of 2 variables:
$ id : int ...
$ code : chr ...
虽然 "id" 在 "a" 中是唯一的,但在 "b" 中不是。更准确地说,"a"和"b"之间存在1:n关系。我想检查"b"中是否有某个代码用于"a$id"。我该怎么做?
我想,我需要这样的东西:
a$code.I25 <- ifelse(<if there is a$id in b$id and for b$id an entry with "I25" for b$code>, 1, 0)
不幸的是,它有点复杂。 "b$code" 的值不仅像 "I25",而且像 "I25.11" 或 "I25.12"。但是,我只想比较 "I25" 并希望 "I25.11" 和 "I25.12" 都为真。这可能吗?
这是一个例子
id_a = c(1, 2, 3, 23, 19, 11:13, 4, 6)
id_b = c(1, 2, 2, 5, 8, 11:13, 3, 3)
code_b = c(rep("I25", 4), rep("I26", 5), "I25")
a = data.frame(id = id_a, stringsAsFactors = FALSE)
a
# id
# 1 1
# 2 2
# 3 3
# 4 23
# 5 19
# 6 11
# 7 12
# 8 13
# 9 4
# 10 6
b = data.frame(id = id_b, code = code_b, stringsAsFactors = FALSE)
b
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 4 5 I25
# 5 8 I26
# 6 11 I26
# 7 12 I26
# 8 13 I26
# 9 3 I26
# 10 3 I25
index = which(b$id %in% a$id)
b[index[which(b[index,]$code %in% "I25")],]
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 10 3 I25
b[index[which(b[index,]$code %in% c("I25", "I26"))],]
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 6 11 I26
# 7 12 I26
# 8 13 I26
# 9 3 I26
# 10 3 I25
#真 |假
b$TF = rep(NA, nrow(b))
b$TF[index[which(b[index,]$code %in% c("I25", "I26"))]] <- 1
b$TF[-(index[which(b[index,]$code %in% c("I25", "I26"))])] <- 0
b
# id code TF
# 1 1 I25 1
# 2 2 I25 1
# 3 2 I25 1
# 4 5 I25 0
# 5 8 I26 0
# 6 11 I26 1
# 7 12 I26 1
# 8 13 I26 1
# 9 3 I26 1
# 10 3 I25 1
#create a dummy data.frame for a
foo.a <- data.frame(id = 1:20,age = rnorm(20,25))
foo.b <-data.frame(id = 1:40,
code = as.character(paste(c("I25","I27"),1:20,sep = ".")))
#replicate it randomly
set.seed(357)
foo.b <-foo.b[sample(nrow(foo.b),75, replace = T),]
#check for matches
id.match <-which(foo.b$id %in% foo.a$id)
#get matching rows
foo.b[grep("I25",foo.b$code[id.match]),]
我有两个 data.frames "a" 和 "b".
str(a)
'data.frame': 1597 obs. of 2 variables:
$ id : int ...
$ age: num ...
> str(b)
'data.frame': 12877 obs. of 2 variables:
$ id : int ...
$ code : chr ...
虽然 "id" 在 "a" 中是唯一的,但在 "b" 中不是。更准确地说,"a"和"b"之间存在1:n关系。我想检查"b"中是否有某个代码用于"a$id"。我该怎么做?
我想,我需要这样的东西:
a$code.I25 <- ifelse(<if there is a$id in b$id and for b$id an entry with "I25" for b$code>, 1, 0)
不幸的是,它有点复杂。 "b$code" 的值不仅像 "I25",而且像 "I25.11" 或 "I25.12"。但是,我只想比较 "I25" 并希望 "I25.11" 和 "I25.12" 都为真。这可能吗?
这是一个例子
id_a = c(1, 2, 3, 23, 19, 11:13, 4, 6)
id_b = c(1, 2, 2, 5, 8, 11:13, 3, 3)
code_b = c(rep("I25", 4), rep("I26", 5), "I25")
a = data.frame(id = id_a, stringsAsFactors = FALSE)
a
# id
# 1 1
# 2 2
# 3 3
# 4 23
# 5 19
# 6 11
# 7 12
# 8 13
# 9 4
# 10 6
b = data.frame(id = id_b, code = code_b, stringsAsFactors = FALSE)
b
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 4 5 I25
# 5 8 I26
# 6 11 I26
# 7 12 I26
# 8 13 I26
# 9 3 I26
# 10 3 I25
index = which(b$id %in% a$id)
b[index[which(b[index,]$code %in% "I25")],]
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 10 3 I25
b[index[which(b[index,]$code %in% c("I25", "I26"))],]
# id code
# 1 1 I25
# 2 2 I25
# 3 2 I25
# 6 11 I26
# 7 12 I26
# 8 13 I26
# 9 3 I26
# 10 3 I25
#真 |假
b$TF = rep(NA, nrow(b))
b$TF[index[which(b[index,]$code %in% c("I25", "I26"))]] <- 1
b$TF[-(index[which(b[index,]$code %in% c("I25", "I26"))])] <- 0
b
# id code TF
# 1 1 I25 1
# 2 2 I25 1
# 3 2 I25 1
# 4 5 I25 0
# 5 8 I26 0
# 6 11 I26 1
# 7 12 I26 1
# 8 13 I26 1
# 9 3 I26 1
# 10 3 I25 1
#create a dummy data.frame for a
foo.a <- data.frame(id = 1:20,age = rnorm(20,25))
foo.b <-data.frame(id = 1:40,
code = as.character(paste(c("I25","I27"),1:20,sep = ".")))
#replicate it randomly
set.seed(357)
foo.b <-foo.b[sample(nrow(foo.b),75, replace = T),]
#check for matches
id.match <-which(foo.b$id %in% foo.a$id)
#get matching rows
foo.b[grep("I25",foo.b$code[id.match]),]