在R中以分层方式在指定字符之前插入一个字符
Inserting a character before specified character in hierarchical manner in R
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
test.want <- c("abcde", "abc1Xe", "abcd1Y", "abc1XY", "abYc1XY", "abcY1X")
假设我想在“X”或“Y”之前添加“1”,如果“X”和“Y”都存在,仅在“X”之前添加。
library(tidyverse)
case_when(
str_detect(test.dat, "X") ~ str_replace(test.dat, "X", "1X"),
str_detect(test.dat, "Y") ~ str_replace(test.dat, "Y", "1Y"),
TRUE ~ as.character(test.dat)
)
这行得通,但是有没有更好的方法以简洁的方式做到这一点?也许在单身 str_replace
?
如果是“X”或“Y”,第二种情况如何以先出现的为准?
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
test.want <- c("abcde", "abc1Xe", "abcd1Y", "abc1XY", "ab1YcXY", "abc1YX")
stringr 更可取,但我欢迎任何其他方法。谢谢。
您可以使用 (?=X)
预测 X
,使用 (?=Y)
预测 Y
,如果有 X
和 ifelse
和 grepl
.
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX", "YXXdY")
ifelse(grepl("X", test.dat)
, sub("(?=X)", "1", test.dat, perl=TRUE)
, sub("(?=Y)", "1", test.dat, perl=TRUE))
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1XXdY"
或
sub("(?=X)|(?=Y(?!.*X))", "1", test.dat, perl=TRUE)
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1XXdY"
其中 (?=X)
匹配 X
之前的位置,而 (?=Y(?!.*X))
匹配 Y
之前的位置,之后的任何位置都没有 X
。
如果不仅要使用第一个命中:
ifelse(grepl("X", test.dat)
, gsub("(?=X)", "1", test.dat, perl=TRUE)
, gsub("(?=Y)", "1", test.dat, perl=TRUE))
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1X1XdY"
或
gsub("(?=X)|(^[^X]*)(?=Y(?!.*X))", "\11", test.dat, perl=TRUE)
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1X1XdY"
并匹配 X
或 Y
,以先到者为准:
sub("(?=X)|(?=Y)", "1", test.dat, perl=TRUE)
#sub("(?=X|Y)", "1", test.dat, perl=TRUE) #Alternative
#sub("(?=[XY])", "1", test.dat, perl=TRUE) #Alternative
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "ab1YcXY" "abc1YX" "1YXXdY"
你可以使用
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
sub("^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2", test.dat)
# => [1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"
stringr::str_replace(test.dat, "^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2")
# => [1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"
参见regex demo。
这里,
^([^XY]*)(Y)([^X]*)$
- 字符串开头 (^
),第 1 组:除 X
和 Y
(([^XY]*)
) 之外的任何零个或多个字符, 第 2 组:Y
((Y)
),第 3 组:除 X
(([^X]*)
) 之外的任何零个或多个字符,字符串结尾 ($
)
|
- 或
(.*)
- 第 4 组:尽可能多的任意零个或多个字符
(X)
- 第 5 组:X
个字符。
如果您需要在没有 X
或 Y
的字符串末尾加 1:
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
sub("^([^XY]*)$", "\11", sub("^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2", test.dat))
library(stringr)
str_replace(str_replace(test.dat, "^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2"), "^([^XY]*)$", "\11")
参见 this R demo。输出:
[1] "abcde1" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
test.want <- c("abcde", "abc1Xe", "abcd1Y", "abc1XY", "abYc1XY", "abcY1X")
假设我想在“X”或“Y”之前添加“1”,如果“X”和“Y”都存在,仅在“X”之前添加。
library(tidyverse)
case_when(
str_detect(test.dat, "X") ~ str_replace(test.dat, "X", "1X"),
str_detect(test.dat, "Y") ~ str_replace(test.dat, "Y", "1Y"),
TRUE ~ as.character(test.dat)
)
这行得通,但是有没有更好的方法以简洁的方式做到这一点?也许在单身 str_replace
?
如果是“X”或“Y”,第二种情况如何以先出现的为准?
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
test.want <- c("abcde", "abc1Xe", "abcd1Y", "abc1XY", "ab1YcXY", "abc1YX")
stringr 更可取,但我欢迎任何其他方法。谢谢。
您可以使用 (?=X)
预测 X
,使用 (?=Y)
预测 Y
,如果有 X
和 ifelse
和 grepl
.
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX", "YXXdY")
ifelse(grepl("X", test.dat)
, sub("(?=X)", "1", test.dat, perl=TRUE)
, sub("(?=Y)", "1", test.dat, perl=TRUE))
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1XXdY"
或
sub("(?=X)|(?=Y(?!.*X))", "1", test.dat, perl=TRUE)
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1XXdY"
其中 (?=X)
匹配 X
之前的位置,而 (?=Y(?!.*X))
匹配 Y
之前的位置,之后的任何位置都没有 X
。
如果不仅要使用第一个命中:
ifelse(grepl("X", test.dat)
, gsub("(?=X)", "1", test.dat, perl=TRUE)
, gsub("(?=Y)", "1", test.dat, perl=TRUE))
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1X1XdY"
或
gsub("(?=X)|(^[^X]*)(?=Y(?!.*X))", "\11", test.dat, perl=TRUE)
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X" "Y1X1XdY"
并匹配 X
或 Y
,以先到者为准:
sub("(?=X)|(?=Y)", "1", test.dat, perl=TRUE)
#sub("(?=X|Y)", "1", test.dat, perl=TRUE) #Alternative
#sub("(?=[XY])", "1", test.dat, perl=TRUE) #Alternative
#[1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "ab1YcXY" "abc1YX" "1YXXdY"
你可以使用
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
sub("^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2", test.dat)
# => [1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"
stringr::str_replace(test.dat, "^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2")
# => [1] "abcde" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"
参见regex demo。
这里,
^([^XY]*)(Y)([^X]*)$
- 字符串开头 (^
),第 1 组:除X
和Y
(([^XY]*)
) 之外的任何零个或多个字符, 第 2 组:Y
((Y)
),第 3 组:除X
(([^X]*)
) 之外的任何零个或多个字符,字符串结尾 ($
)|
- 或(.*)
- 第 4 组:尽可能多的任意零个或多个字符(X)
- 第 5 组:X
个字符。
如果您需要在没有 X
或 Y
的字符串末尾加 1:
test.dat <- c("abcde", "abcXe", "abcdY", "abcXY", "abYcXY", "abcYX")
sub("^([^XY]*)$", "\11", sub("^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2", test.dat))
library(stringr)
str_replace(str_replace(test.dat, "^([^XY]*)(Y)([^X]*)$|(.*)(X)", "\1\41\3\5\2"), "^([^XY]*)$", "\11")
参见 this R demo。输出:
[1] "abcde1" "abc1Xe" "abcd1Y" "abc1XY" "abYc1XY" "abcY1X"