跟进:消除 R 中 2 个向量的部分重叠部分
Followup: Eliminating partially overlapping parts of 2 vectors in R
我正在跟进 。我想知道是否可以删除 n1
字符向量中 与 f1
中的元素部分重叠 的部分公式.
例如,在n1
中,我们看到"timePost-test 1"
&"timePost-test 2"
与f1
中的time
部分重叠。
因此在desired_output
中,我们要删除其中的"time"
部分。因为 n1
中的其他元素与 f1
中的元素完全重叠(例如 time_wk_whn
),我们保持它们不变。
我尝试了以下解决方案,但无法获得我的 desired output
(下)。
是否可以在 BASE R
或 tidyvesrse 中获取我的 desired_output
?
foo <- function(fmla, vec) {
v1 <- all.vars(fmla)
v2 <- setdiff(vec, v1)
v1 <- paste0('^', v1)
v3 <- sub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
f1 <- gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw
n1 <- c("time_wk_whn","time_wk_btw",
"timePost-test 1:pub_typejournal","timePost-test 2:pub_typejournal")
#================================ EXAMPLE OF USE:
foo(f1, n1)
# [1] "time_wk_whn" "time_wk_btw"
# [3] "Post-test 1:pub_typejournal" "Post-test 2:pub_typejournal"
desired_output <- c("time_wk_whn","time_wk_btw",
"Post-test 1:journal","Post-test 2:journal")
#== Similarly `f1` and `n1` can contain non-charctor elements (below) but
#== the desired output should remain the same.
f1 <- gi ~ 0 + factor(time):pub_type + time_wk_whn + time_wk_btw
n1 <- c("time_wk_whn","time_wk_btw", "factor(time)Post-test 1:pub_typejournal",
"factor(time)Post-test 2:pub_typejournal")
当您附加 ^
时,您是在声明它需要成为字符串的开头。此外,sub()
只会查找第一个匹配项。您将需要使用 gsub()
.
foo <- function(fmla, vec) {
v1 <- all.vars(fmla)
v2 <- setdiff(vec, v1)
v3 <- gsub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
fmla <- f1 <- gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw
vec <- n1 <- c("time_wk_whn","time_wk_btw",
"timePost-test 1:pub_typejournal","timePost-test 2:pub_typejournal")
desired_output <- c("time_wk_whn","time_wk_btw",
"Post-test 1:journal","Post-test 2:journal")
x = foo(f1, n1)
# [1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal"
# [4] "Post-test 2:journal"
all.equal(desired_output, x)
# [1] TRUE
您的想法可行,但只需要多做一些预处理。考虑
foo <- function(fmla, vec) {
v1 <- as.character(attr(terms(fmla), "variables"))[-1L] # Get the terms instead of the variables. This gives you `factor(time)` but not `time`.
v2 <- setdiff(vec, v1)
v1 <- gsub("([\\^$.?*|+()[\]{}])", "\\\1", v1, perl = TRUE) # Escape all special characters
v3 <- gsub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
输出
> foo(
+ gi ~ 0 + factor(time):pub_type + time_wk_whn + time_wk_btw,
+ c("time_wk_whn","time_wk_btw", "factor(time)Post-test 1:pub_typejournal", "factor(time)Post-test 2:pub_typejournal")
+ )
[1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal" "Post-test 2:journal"
> foo(
+ gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw,
+ c("time_wk_whn","time_wk_btw", "timePost-test 1:pub_typejournal", "timePost-test 2:pub_typejournal")
+ )
[1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal" "Post-test 2:journal"
我正在跟进 n1
字符向量中 与 f1
中的元素部分重叠 的部分公式.
例如,在n1
中,我们看到"timePost-test 1"
&"timePost-test 2"
与f1
中的time
部分重叠。
因此在desired_output
中,我们要删除其中的"time"
部分。因为 n1
中的其他元素与 f1
中的元素完全重叠(例如 time_wk_whn
),我们保持它们不变。
我尝试了以下解决方案,但无法获得我的 desired output
(下)。
是否可以在 BASE R
或 tidyvesrse 中获取我的 desired_output
?
foo <- function(fmla, vec) {
v1 <- all.vars(fmla)
v2 <- setdiff(vec, v1)
v1 <- paste0('^', v1)
v3 <- sub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
f1 <- gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw
n1 <- c("time_wk_whn","time_wk_btw",
"timePost-test 1:pub_typejournal","timePost-test 2:pub_typejournal")
#================================ EXAMPLE OF USE:
foo(f1, n1)
# [1] "time_wk_whn" "time_wk_btw"
# [3] "Post-test 1:pub_typejournal" "Post-test 2:pub_typejournal"
desired_output <- c("time_wk_whn","time_wk_btw",
"Post-test 1:journal","Post-test 2:journal")
#== Similarly `f1` and `n1` can contain non-charctor elements (below) but
#== the desired output should remain the same.
f1 <- gi ~ 0 + factor(time):pub_type + time_wk_whn + time_wk_btw
n1 <- c("time_wk_whn","time_wk_btw", "factor(time)Post-test 1:pub_typejournal",
"factor(time)Post-test 2:pub_typejournal")
当您附加 ^
时,您是在声明它需要成为字符串的开头。此外,sub()
只会查找第一个匹配项。您将需要使用 gsub()
.
foo <- function(fmla, vec) {
v1 <- all.vars(fmla)
v2 <- setdiff(vec, v1)
v3 <- gsub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
fmla <- f1 <- gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw
vec <- n1 <- c("time_wk_whn","time_wk_btw",
"timePost-test 1:pub_typejournal","timePost-test 2:pub_typejournal")
desired_output <- c("time_wk_whn","time_wk_btw",
"Post-test 1:journal","Post-test 2:journal")
x = foo(f1, n1)
# [1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal"
# [4] "Post-test 2:journal"
all.equal(desired_output, x)
# [1] TRUE
您的想法可行,但只需要多做一些预处理。考虑
foo <- function(fmla, vec) {
v1 <- as.character(attr(terms(fmla), "variables"))[-1L] # Get the terms instead of the variables. This gives you `factor(time)` but not `time`.
v2 <- setdiff(vec, v1)
v1 <- gsub("([\\^$.?*|+()[\]{}])", "\\\1", v1, perl = TRUE) # Escape all special characters
v3 <- gsub(paste(v1, collapse = "|"), "", v2)
vec[vec %in% v2] <- v3
vec
}
输出
> foo(
+ gi ~ 0 + factor(time):pub_type + time_wk_whn + time_wk_btw,
+ c("time_wk_whn","time_wk_btw", "factor(time)Post-test 1:pub_typejournal", "factor(time)Post-test 2:pub_typejournal")
+ )
[1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal" "Post-test 2:journal"
> foo(
+ gi ~ 0 + time:pub_type + time_wk_whn + time_wk_btw,
+ c("time_wk_whn","time_wk_btw", "timePost-test 1:pub_typejournal", "timePost-test 2:pub_typejournal")
+ )
[1] "time_wk_whn" "time_wk_btw" "Post-test 1:journal" "Post-test 2:journal"