R银行对帐单分组

R bank statement grouping

我通过按零售商名称对购买进行分组来分析我的银行对帐单,然后可以使用 dplyr 函数分析生成的数据框。我下面的方法使用自定义函数并且有效,但我很想知道是否有更有效的方法。例如,是否有任何包可以使用数据框列之间的复杂匹配逻辑来连接数据框?

debug(FindRetailer)

FindRetailer<-function(Purchase){
    P <- toupper(Purchase)
  for(z in 1:length(RetailerNames)){
    Retailer<-toupper(RetailerNames[z])
    HasFound=grepl(Retailer,P)
    if(HasFound==TRUE){
      return(str_to_title(Retailer))
    }
  }
    return("Donno")
}

Statement <- data.frame(
  Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
  Amount = c(235,23,789,45))

RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")

# what I need
Result <- data.frame(
  Purchase = c("abc Aldi xyz","a KMART bcd","a STARBUCKS mmm","abcd MACD efg"),
  Amount = c(235,23,789,45),
  Retailer = c("Aldi","Kmart","Starbucks","Macd"))

# this works using custom function
NewStatment<-Statement %>% 
  rowwise() %>% 
  mutate(Retailer=FindRetailer(Purchase))

# is this possible: join dataframes using complex string matching?
# this doesn't work yet
TestMethod<-Statement %>% 
  left_join(RetailerNames,by="Statement.Purchase %in% RetailerNames")


library(tidyverse)
library(glue) 
Statement <- data.frame(
  Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
  Amount = c(235,23,789,45))

RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")


Statement %>% 
  mutate(
    Retailer = Purchase %>% 
      str_extract(RetailerNames %>% collapse(sep ="|") %>% regex(ignore_case = T))
    )
#>           Purchase Amount  Retailer
#> 1     abc Aldi xyz    235      Aldi
#> 2      a Kmart bcd     23     Kmart
#> 3 a STARBUCKS ghju    789 STARBUCKS
#> 4    abcd MacD efg     45      MacD

如果您想走 left_join 路线,请尝试

library(fuzzyjoin)

RetailerNames<- data_frame(Retailer = c("Aldi","Kmart","Starbucks","MacD"))

Statement %>%
  regex_left_join(RetailerNames, by = c(Purchase="Retailer"))