有条件地重复字符串的某些部分
Repeating certain part of string conditionally
我想重复 ]
和 ;
之间的字符串的某些部分,作为 []
之前由 ;
分隔的元素数。因此 [A1, AB11; A2, AB22] I1, C1
的期望输出将是 [A1, AB11] I1, C1; [A2, AB22] I1, C1
。任何提示开始。谢谢
df1 <-
data.frame(
String = c(
"[A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1"
, "[A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7"
)
)
df1
String
1 [A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7
df2 <-
data.frame(
String = c(
"[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"
, "[A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7"
)
)
df2
String
1 [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7
不是最整洁的解决方案,但它使用了 stringr
str_split(df1$String, ";(?= *\[)") %>%
map(str_match, "\[(.+?)\] (.+)") %>%
map( ~ paste(unlist(map2(paste0(str_split(.x[,2], "; ?")), .x[,3], ~ paste0("[", .x,"] ",.y ))), collapse="; "))
更好的解决方案:
as_tibble(df1) %>%
mutate(splits=str_split(String, "; *(?=\[)")) %>%
unnest_longer(col=splits) %>%
mutate(splits=map(str_split(splits,"\[|\] ?"), str_split, "; ?")) %>%
unnest_wider(splits) %>%
mutate(val=map2(...2, ...3, ~ paste0("[", .x ,"] ", .y, collapse="; ") )) %>%
group_by(String) %>%
summarise(val=paste0(val, collapse="; "))
# A tibble: 2 x 2
String val
<fct> <chr>
1 [A1, AB11; A2, AB22] I1, C1; [A3,… [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6,… [A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7
这是一个基本的 R 解决方案:
sapply(strsplit(paste0(df1$String, ";"), "\[|\]"), function(x) {
for(i in seq_along(x))
{
if(i %% 2 == 0) {
x[i] <- paste0("[", gsub(";", paste0("]", x[i + 1], " ["), x[i]), "]")
}
}
paste(x, collapse = "")
})
#> [1] "[A1, AB11] I1, C1; [ A2, AB22] I1, C1; [A3, AB33] I3, C1;"
#> [2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [ A6, AB66] I7, C7; [ A7, AB77] I7, C7;"
我过去曾尝试过类似的东西,并认为使用 glue
和 unglue
包进行改编可能会很有趣。
开头的strsplit
用分号分隔,忽略括号之间的分号。
对于每一行,unglue
将把重复的括号之间的内容和括号外附加的内容分开。
library(glue)
library(unglue)
library(purrr)
my_fun <- function(inside, outside) {
glue("[{inside}] {outside}")
}
sapply(strsplit(df1$String, '\[[^]]*\](*SKIP)(*F)|;\s', perl = T), function(x) {
ud <- unglue_data(x, patterns = "[{Inside}] {Outside}")
ud_in <- map(ud[['Inside']], strsplit, split = "; ")
ud_map <- map(seq_along(ud[['Inside']]), function(y) {
map2(unlist(ud_in[y]), ud[['Outside']][y], my_fun)
})
paste(unlist(ud_map), collapse = '; ')
})
输出
[1] "[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"
[2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7"
我想重复 ]
和 ;
之间的字符串的某些部分,作为 []
之前由 ;
分隔的元素数。因此 [A1, AB11; A2, AB22] I1, C1
的期望输出将是 [A1, AB11] I1, C1; [A2, AB22] I1, C1
。任何提示开始。谢谢
df1 <-
data.frame(
String = c(
"[A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1"
, "[A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7"
)
)
df1
String
1 [A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7
df2 <-
data.frame(
String = c(
"[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"
, "[A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7"
)
)
df2
String
1 [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7
不是最整洁的解决方案,但它使用了 stringr
str_split(df1$String, ";(?= *\[)") %>%
map(str_match, "\[(.+?)\] (.+)") %>%
map( ~ paste(unlist(map2(paste0(str_split(.x[,2], "; ?")), .x[,3], ~ paste0("[", .x,"] ",.y ))), collapse="; "))
更好的解决方案:
as_tibble(df1) %>%
mutate(splits=str_split(String, "; *(?=\[)")) %>%
unnest_longer(col=splits) %>%
mutate(splits=map(str_split(splits,"\[|\] ?"), str_split, "; ?")) %>%
unnest_wider(splits) %>%
mutate(val=map2(...2, ...3, ~ paste0("[", .x ,"] ", .y, collapse="; ") )) %>%
group_by(String) %>%
summarise(val=paste0(val, collapse="; "))
# A tibble: 2 x 2
String val
<fct> <chr>
1 [A1, AB11; A2, AB22] I1, C1; [A3,… [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6,… [A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7
这是一个基本的 R 解决方案:
sapply(strsplit(paste0(df1$String, ";"), "\[|\]"), function(x) {
for(i in seq_along(x))
{
if(i %% 2 == 0) {
x[i] <- paste0("[", gsub(";", paste0("]", x[i + 1], " ["), x[i]), "]")
}
}
paste(x, collapse = "")
})
#> [1] "[A1, AB11] I1, C1; [ A2, AB22] I1, C1; [A3, AB33] I3, C1;"
#> [2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [ A6, AB66] I7, C7; [ A7, AB77] I7, C7;"
我过去曾尝试过类似的东西,并认为使用 glue
和 unglue
包进行改编可能会很有趣。
开头的strsplit
用分号分隔,忽略括号之间的分号。
对于每一行,unglue
将把重复的括号之间的内容和括号外附加的内容分开。
library(glue)
library(unglue)
library(purrr)
my_fun <- function(inside, outside) {
glue("[{inside}] {outside}")
}
sapply(strsplit(df1$String, '\[[^]]*\](*SKIP)(*F)|;\s', perl = T), function(x) {
ud <- unglue_data(x, patterns = "[{Inside}] {Outside}")
ud_in <- map(ud[['Inside']], strsplit, split = "; ")
ud_map <- map(seq_along(ud[['Inside']]), function(y) {
map2(unlist(ud_in[y]), ud[['Outside']][y], my_fun)
})
paste(unlist(ud_map), collapse = '; ')
})
输出
[1] "[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"
[2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7"