确定最终列中的值是否存在于各个行中
Determine if Value in Final Column exists in respective rows
我有一个数据框如下:
df1
ColA ColB ColC ColD ColE COlF ColG Recs
1 A-1 A - 3 B B NA C
1 B-1 C R D E NA B
1 NA A B A B
如何确定 Recs 列中的最后一个是否在其各自的行中找到?
我在下面尝试过,但它不起作用,因为我的普通数据集中有重复项:
df1$Exist <- apply(df1, 1, FUN = function(x)
c("No", "Yes")[(anyDuplicated(x[!is.na(x) & x != "" ])!=0) +1])
还有空格、NA 和包含空格和破折号的字符值。
最终输出应该是:
ColA ColB ColC ColD ColE COlF ColG Recs Exist?
1 A-1 A - 3 B B NA C No
1 B-1 C R D E NA B No
1 NA A B A B Yes
谢谢
exist <- rep(NA, nrow(df1))
for (i in 1:nrow(df1)) {
exist[i] <- df1$Recs[i] %in% df1[i, 1:7]
}
df1 <- cbind(df1, exist)
如果我理解正确的话,这应该有效:
# Compute column index of reference variable
col_ind <- which(colnames(df1) == "Recs")
# Compute boolean vector of presence
present_bool <- apply(df1, 1, function(row) {
any(row[col_ind] == row[-col_ind], na.rm = TRUE)
})
# Create the desired column
df1$Exist <- ifelse(present_bool, "Yes", "No")
为了效率,你可以在这里使用data.table。
library(data.table)
setDT(df)[, Exist := Recs %chin% unlist(.SD), .SDcols=-"Recs", by=1:nrow(df)]
这给出了
ColA ColB ColC ColD ColE COlF ColG Recs Exist
1: 1 A-1 A-3 B B NA NA C FALSE
2: 1 B-1 C R D E NA B FALSE
3: 1 NA A B A NA B TRUE
原始数据:
df <-structure(list(ColA = c(1L, 1L, 1L), ColB = c("A-1", "", NA),
ColC = c("A-3", "B-1", "A"), ColD = c("B", "C R", "B"), ColE = c("B",
"D", "A"), COlF = c(NA, "E", ""), ColG = c(NA, NA, NA), Recs = c("C",
"B", "B")), .Names = c("ColA", "ColB", "ColC", "ColD", "ColE",
"COlF", "ColG", "Recs"), row.names = c(NA, -3L), class = "data.frame")
这应该是另一种获得所需结果的方法:
f.checkExist <- function(x) {
grepl(df[x, 8], df[x, 1:7])
}
df$exists <- grepl(T, lapply(1:nrow(df), f.checkExist))
我有一个数据框如下:
df1
ColA ColB ColC ColD ColE COlF ColG Recs
1 A-1 A - 3 B B NA C
1 B-1 C R D E NA B
1 NA A B A B
如何确定 Recs 列中的最后一个是否在其各自的行中找到?
我在下面尝试过,但它不起作用,因为我的普通数据集中有重复项:
df1$Exist <- apply(df1, 1, FUN = function(x)
c("No", "Yes")[(anyDuplicated(x[!is.na(x) & x != "" ])!=0) +1])
还有空格、NA 和包含空格和破折号的字符值。
最终输出应该是:
ColA ColB ColC ColD ColE COlF ColG Recs Exist?
1 A-1 A - 3 B B NA C No
1 B-1 C R D E NA B No
1 NA A B A B Yes
谢谢
exist <- rep(NA, nrow(df1))
for (i in 1:nrow(df1)) {
exist[i] <- df1$Recs[i] %in% df1[i, 1:7]
}
df1 <- cbind(df1, exist)
如果我理解正确的话,这应该有效:
# Compute column index of reference variable
col_ind <- which(colnames(df1) == "Recs")
# Compute boolean vector of presence
present_bool <- apply(df1, 1, function(row) {
any(row[col_ind] == row[-col_ind], na.rm = TRUE)
})
# Create the desired column
df1$Exist <- ifelse(present_bool, "Yes", "No")
为了效率,你可以在这里使用data.table。
library(data.table)
setDT(df)[, Exist := Recs %chin% unlist(.SD), .SDcols=-"Recs", by=1:nrow(df)]
这给出了
ColA ColB ColC ColD ColE COlF ColG Recs Exist 1: 1 A-1 A-3 B B NA NA C FALSE 2: 1 B-1 C R D E NA B FALSE 3: 1 NA A B A NA B TRUE
原始数据:
df <-structure(list(ColA = c(1L, 1L, 1L), ColB = c("A-1", "", NA),
ColC = c("A-3", "B-1", "A"), ColD = c("B", "C R", "B"), ColE = c("B",
"D", "A"), COlF = c(NA, "E", ""), ColG = c(NA, NA, NA), Recs = c("C",
"B", "B")), .Names = c("ColA", "ColB", "ColC", "ColD", "ColE",
"COlF", "ColG", "Recs"), row.names = c(NA, -3L), class = "data.frame")
这应该是另一种获得所需结果的方法:
f.checkExist <- function(x) {
grepl(df[x, 8], df[x, 1:7])
}
df$exists <- grepl(T, lapply(1:nrow(df), f.checkExist))