替换引用命名向量的整个字符串
Replace whole string referencing a named vector
我想对命名向量进行部分字符串匹配,在匹配的地方使用命名向量作为整个字符串值。
感谢您的帮助。
library(tidyr)
library(stringr)
map <- tibble(str = c('rolex','mazda','nike'),
cat = c('watch','car','shoe'))
df <- tibble(data = c('I love driving mazda','He wears a rolex to work','awesome nike shoe'))
样本table
> map
# A tibble: 3 x 2
str cat
<chr> <chr>
1 rolex watch
2 mazda car
3 nike shoe
> df
# A tibble: 3 x 1
data
<chr>
1 I love driving mazda
2 He wears a rolex to work
3 awesome nike shoe
想要结果
> df
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda car
2 He wears a rolex to work watch
3 awesome nike shoe shoe
试过了,但它替换了它找到的字符串,但没有替换上面引用命名向量的整个字符串。
map_v = as.character(map$cat)
names(map_v) = map$str
不需要的结果
> df %>% mutate(cat = str_replace_all(data,map_v))
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda I love driving car
2 He wears a rolex to work He wears a watch to work
3 awesome nike shoe awesome shoe shoe
library(purrr)
library(dplyr)
library(stringr)
df %>%
mutate(cat = map_chr(data, ~ map$cat[str_detect(.x, map$str)]))
# # A tibble: 3 x 2
# data cat
# <chr> <chr>
# 1 I love driving mazda car
# 2 He wears a rolex to work watch
# 3 awesome nike shoe shoe
或者你可以使用 fuzzyjoin 包
library(fuzzyjoin)
df %>%
regex_inner_join(map, by = c(data = 'str'))
# # A tibble: 3 x 3
# data str cat
# <chr> <chr> <chr>
# 1 I love driving mazda mazda car
# 2 He wears a rolex to work rolex watch
# 3 awesome nike shoe nike shoe
使用 outer
+ grepl
+ max.col
的基础 R 选项
df$cat <- with(map, cat[max.col(t(outer(map$str, df$data, Vectorize(grepl))))])
给予
> df
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda car
2 He wears a rolex to work watch
3 awesome nike shoe shoe
我想对命名向量进行部分字符串匹配,在匹配的地方使用命名向量作为整个字符串值。
感谢您的帮助。
library(tidyr)
library(stringr)
map <- tibble(str = c('rolex','mazda','nike'),
cat = c('watch','car','shoe'))
df <- tibble(data = c('I love driving mazda','He wears a rolex to work','awesome nike shoe'))
样本table
> map
# A tibble: 3 x 2
str cat
<chr> <chr>
1 rolex watch
2 mazda car
3 nike shoe
> df
# A tibble: 3 x 1
data
<chr>
1 I love driving mazda
2 He wears a rolex to work
3 awesome nike shoe
想要结果
> df
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda car
2 He wears a rolex to work watch
3 awesome nike shoe shoe
试过了,但它替换了它找到的字符串,但没有替换上面引用命名向量的整个字符串。
map_v = as.character(map$cat)
names(map_v) = map$str
不需要的结果
> df %>% mutate(cat = str_replace_all(data,map_v))
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda I love driving car
2 He wears a rolex to work He wears a watch to work
3 awesome nike shoe awesome shoe shoe
library(purrr)
library(dplyr)
library(stringr)
df %>%
mutate(cat = map_chr(data, ~ map$cat[str_detect(.x, map$str)]))
# # A tibble: 3 x 2
# data cat
# <chr> <chr>
# 1 I love driving mazda car
# 2 He wears a rolex to work watch
# 3 awesome nike shoe shoe
或者你可以使用 fuzzyjoin 包
library(fuzzyjoin)
df %>%
regex_inner_join(map, by = c(data = 'str'))
# # A tibble: 3 x 3
# data str cat
# <chr> <chr> <chr>
# 1 I love driving mazda mazda car
# 2 He wears a rolex to work rolex watch
# 3 awesome nike shoe nike shoe
使用 outer
+ grepl
+ max.col
df$cat <- with(map, cat[max.col(t(outer(map$str, df$data, Vectorize(grepl))))])
给予
> df
# A tibble: 3 x 2
data cat
<chr> <chr>
1 I love driving mazda car
2 He wears a rolex to work watch
3 awesome nike shoe shoe