R 警告:条件的长度 > 1,并且只会使用第一个元素。外部函数
R Warning: the condition has length > 1 and only the first element will be used. outer function
我有以下两个函数:
name_fitting <- function(term1, term2)
{
if (nchar(term1) <= 3)
{
temp <- substring(term2, 1,nchar(term1))
return(temp==term1)
}
else {return(grepl(term1, term2))}
}
name_matching <- function(name1, name2)
{
name1 <- gsub('[[:punct:]]+','', name1)
name2 <- gsub('[[:punct:]]+','', name2)
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) > 1) {return(TRUE)}
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) == 1)
{
non_matching <- union(setdiff(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' ')))), setdiff(as.character(unlist(strsplit(name2, ' '))), as.character(unlist(strsplit(name1, ' ')))))
temp <- outer(X = non_matching, Y = non_matching, FUN = 'name_fitting')
diag(temp)<-FALSE
return(any(temp))
}
else(return(FALSE))
}
name_fitting用于name_matching。 name_matching 检查传递给函数的两个名称是否兼容,returns TRUE 或 FALSE。
当我尝试按如下方式匹配两个名称时:
name1<-"MARCO BRAMBILLA"
name2<-"M BRAMBILLA BRANDUARDI"
我收到以下警告:
the condition has length > 1 and only the first element will be used
表明外部函数没有将数据正确传递给 name_fitting。
我该如何解决?
您的函数传递 name_fitting 字符向量 non_matching
,其中包含三个元素:[1] "MARCO" "M" "BRANDUARDI"
。该向量被传递给 if
调用 if (nchar(term1) <= 3)
。问题是 nchar(term1) <= 3
给出了一个长度为 3 的向量:[1] FALSE TRUE FALSE
.
当然,问题是你想在这里达到什么目的。如果您试图确定 term1 是否具有三个或更多元素,请将 nchar
替换为 length
。如果您试图查看 non_matching 的任何元素的长度是否为 3 个字符或更少,请将 nchar() 调用放在 any()
中。如果您尝试仅检查 non_matching 的第一个元素,请传递 term1[1]
而不是 term1
.
Vectorize(function)
是解决方案:
name_fitting <- function(term1, term2)
{
if (nchar(term1) <= 3)
{
temp <- substring(term2, 1,nchar(term1))
return(temp==term1)
}
else {return(grepl(term1, term2))}
}
name_fitting <- Vectorize(name_fitting)
name_matching <- function(name1, name2)
{
name1 <- trimws(gsub('[[:punct:]]+','', name1))
name2 <- trimws(gsub('[[:punct:]]+','', name2))
temp <- intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))
temp <- temp[temp!=c('')]
if (length(temp) > 1) {return(TRUE)}
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) == 1)
{
non_matching <- union(setdiff(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' ')))), setdiff(as.character(unlist(strsplit(name2, ' '))), as.character(unlist(strsplit(name1, ' ')))))
non_matching <- non_matching[non_matching!=c("")]
temp <- outer(X = non_matching, Y = non_matching, FUN = 'name_fitting')
diag(temp)<-FALSE
return(any(temp))
}
else(return(FALSE))
}
name_matching <- Vectorize(name_matching)
我有以下两个函数:
name_fitting <- function(term1, term2)
{
if (nchar(term1) <= 3)
{
temp <- substring(term2, 1,nchar(term1))
return(temp==term1)
}
else {return(grepl(term1, term2))}
}
name_matching <- function(name1, name2)
{
name1 <- gsub('[[:punct:]]+','', name1)
name2 <- gsub('[[:punct:]]+','', name2)
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) > 1) {return(TRUE)}
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) == 1)
{
non_matching <- union(setdiff(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' ')))), setdiff(as.character(unlist(strsplit(name2, ' '))), as.character(unlist(strsplit(name1, ' ')))))
temp <- outer(X = non_matching, Y = non_matching, FUN = 'name_fitting')
diag(temp)<-FALSE
return(any(temp))
}
else(return(FALSE))
}
name_fitting用于name_matching。 name_matching 检查传递给函数的两个名称是否兼容,returns TRUE 或 FALSE。
当我尝试按如下方式匹配两个名称时:
name1<-"MARCO BRAMBILLA"
name2<-"M BRAMBILLA BRANDUARDI"
我收到以下警告:
the condition has length > 1 and only the first element will be used
表明外部函数没有将数据正确传递给 name_fitting。
我该如何解决?
您的函数传递 name_fitting 字符向量 non_matching
,其中包含三个元素:[1] "MARCO" "M" "BRANDUARDI"
。该向量被传递给 if
调用 if (nchar(term1) <= 3)
。问题是 nchar(term1) <= 3
给出了一个长度为 3 的向量:[1] FALSE TRUE FALSE
.
当然,问题是你想在这里达到什么目的。如果您试图确定 term1 是否具有三个或更多元素,请将 nchar
替换为 length
。如果您试图查看 non_matching 的任何元素的长度是否为 3 个字符或更少,请将 nchar() 调用放在 any()
中。如果您尝试仅检查 non_matching 的第一个元素,请传递 term1[1]
而不是 term1
.
Vectorize(function)
是解决方案:
name_fitting <- function(term1, term2)
{
if (nchar(term1) <= 3)
{
temp <- substring(term2, 1,nchar(term1))
return(temp==term1)
}
else {return(grepl(term1, term2))}
}
name_fitting <- Vectorize(name_fitting)
name_matching <- function(name1, name2)
{
name1 <- trimws(gsub('[[:punct:]]+','', name1))
name2 <- trimws(gsub('[[:punct:]]+','', name2))
temp <- intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))
temp <- temp[temp!=c('')]
if (length(temp) > 1) {return(TRUE)}
if (length(intersect(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' '))))) == 1)
{
non_matching <- union(setdiff(as.character(unlist(strsplit(name1, ' '))), as.character(unlist(strsplit(name2, ' ')))), setdiff(as.character(unlist(strsplit(name2, ' '))), as.character(unlist(strsplit(name1, ' ')))))
non_matching <- non_matching[non_matching!=c("")]
temp <- outer(X = non_matching, Y = non_matching, FUN = 'name_fitting')
diag(temp)<-FALSE
return(any(temp))
}
else(return(FALSE))
}
name_matching <- Vectorize(name_matching)