R银行对帐单分组
R bank statement grouping
我通过按零售商名称对购买进行分组来分析我的银行对帐单,然后可以使用 dplyr
函数分析生成的数据框。我下面的方法使用自定义函数并且有效,但我很想知道是否有更有效的方法。例如,是否有任何包可以使用数据框列之间的复杂匹配逻辑来连接数据框?
debug(FindRetailer)
FindRetailer<-function(Purchase){
P <- toupper(Purchase)
for(z in 1:length(RetailerNames)){
Retailer<-toupper(RetailerNames[z])
HasFound=grepl(Retailer,P)
if(HasFound==TRUE){
return(str_to_title(Retailer))
}
}
return("Donno")
}
Statement <- data.frame(
Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
Amount = c(235,23,789,45))
RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")
# what I need
Result <- data.frame(
Purchase = c("abc Aldi xyz","a KMART bcd","a STARBUCKS mmm","abcd MACD efg"),
Amount = c(235,23,789,45),
Retailer = c("Aldi","Kmart","Starbucks","Macd"))
# this works using custom function
NewStatment<-Statement %>%
rowwise() %>%
mutate(Retailer=FindRetailer(Purchase))
# is this possible: join dataframes using complex string matching?
# this doesn't work yet
TestMethod<-Statement %>%
left_join(RetailerNames,by="Statement.Purchase %in% RetailerNames")
library(tidyverse)
library(glue)
Statement <- data.frame(
Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
Amount = c(235,23,789,45))
RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")
Statement %>%
mutate(
Retailer = Purchase %>%
str_extract(RetailerNames %>% collapse(sep ="|") %>% regex(ignore_case = T))
)
#> Purchase Amount Retailer
#> 1 abc Aldi xyz 235 Aldi
#> 2 a Kmart bcd 23 Kmart
#> 3 a STARBUCKS ghju 789 STARBUCKS
#> 4 abcd MacD efg 45 MacD
如果您想走 left_join
路线,请尝试
library(fuzzyjoin)
RetailerNames<- data_frame(Retailer = c("Aldi","Kmart","Starbucks","MacD"))
Statement %>%
regex_left_join(RetailerNames, by = c(Purchase="Retailer"))
我通过按零售商名称对购买进行分组来分析我的银行对帐单,然后可以使用 dplyr
函数分析生成的数据框。我下面的方法使用自定义函数并且有效,但我很想知道是否有更有效的方法。例如,是否有任何包可以使用数据框列之间的复杂匹配逻辑来连接数据框?
debug(FindRetailer)
FindRetailer<-function(Purchase){
P <- toupper(Purchase)
for(z in 1:length(RetailerNames)){
Retailer<-toupper(RetailerNames[z])
HasFound=grepl(Retailer,P)
if(HasFound==TRUE){
return(str_to_title(Retailer))
}
}
return("Donno")
}
Statement <- data.frame(
Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
Amount = c(235,23,789,45))
RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")
# what I need
Result <- data.frame(
Purchase = c("abc Aldi xyz","a KMART bcd","a STARBUCKS mmm","abcd MACD efg"),
Amount = c(235,23,789,45),
Retailer = c("Aldi","Kmart","Starbucks","Macd"))
# this works using custom function
NewStatment<-Statement %>%
rowwise() %>%
mutate(Retailer=FindRetailer(Purchase))
# is this possible: join dataframes using complex string matching?
# this doesn't work yet
TestMethod<-Statement %>%
left_join(RetailerNames,by="Statement.Purchase %in% RetailerNames")
library(tidyverse)
library(glue)
Statement <- data.frame(
Purchase = c("abc Aldi xyz","a Kmart bcd","a STARBUCKS ghju","abcd MacD efg"),
Amount = c(235,23,789,45))
RetailerNames<- c("Aldi","Kmart","Starbucks","MacD")
Statement %>%
mutate(
Retailer = Purchase %>%
str_extract(RetailerNames %>% collapse(sep ="|") %>% regex(ignore_case = T))
)
#> Purchase Amount Retailer
#> 1 abc Aldi xyz 235 Aldi
#> 2 a Kmart bcd 23 Kmart
#> 3 a STARBUCKS ghju 789 STARBUCKS
#> 4 abcd MacD efg 45 MacD
如果您想走 left_join
路线,请尝试
library(fuzzyjoin)
RetailerNames<- data_frame(Retailer = c("Aldi","Kmart","Starbucks","MacD"))
Statement %>%
regex_left_join(RetailerNames, by = c(Purchase="Retailer"))