使用 R 创建一个函数来过滤和汇总
Make a function to filter and summarize using R
我有这两个table;
<A> <B>
a1 a2 b1
ABC CAFE AB
ABD DRINK BF
ABF CAFE ..
ABFF DRINK
.. ..
我想知道 table 包含 B 到 table A 中的 a1 这样的摘要;
library(dplyr)
library(stringr)
A1 <- A %>%
filter(str_detect(a1, "AB")) %>%
group_by(a2) %>%
summarize(n())
A2 <- A %>%
filter(str_detect(a1, "BF")) %>%
group_by(a2) %>%
summarize(n())
但是,我应该多次编写代码,以便我想在 str_detect 函数中输入 B table 的函数...如何编写函数?
我想这解决了您的问题:
lapply(B$b1,function(x)A%>%filter(str_detect(a1, x)) %>% group_by(a2) %>% summarize(n()))
这里我设计了一个函数count_fun
,它有四个参数。 dat
和A
一样是数据框,Scol
是字符串列,Gcol
是分组列,String
是测试字符串。请参阅 https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html 了解如何使用 dplyr
.
设计函数
library(dplyr)
library(stringr)
count_fun <- function(dat, Scol, Gcol, String){
Scol <- enquo(Scol)
Gcol <- enquo(Gcol)
dat2 <- dat %>%
filter(str_detect(!!Scol, String)) %>%
group_by(!!Gcol) %>%
summarize(n())
return(dat2)
}
count_fun(A, a1, a2, "AB")
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 2
# 2 DRINK 2
count_fun(A, a1, a2, "BF")
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 1
# 2 DRINK 1
然后我们可以应用 count_fun
使用 lapply
循环遍历 B
.
中的每个元素
lapply(B$b1, function(x){
count_fun(A, a1, a2, x)
})
# [[1]]
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 2
# 2 DRINK 2
#
# [[2]]
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 1
# 2 DRINK 1
数据
A <- read.table(text = "a1 a2
ABC CAFE
ABD DRINK
ABF CAFE
ABFF DRINK
",
header = TRUE, stringsAsFactors = FALSE)
B <- data.frame(b1 = c("AB", "BF"), stringsAsFactors = FALSE)
我有这两个table;
<A> <B>
a1 a2 b1
ABC CAFE AB
ABD DRINK BF
ABF CAFE ..
ABFF DRINK
.. ..
我想知道 table 包含 B 到 table A 中的 a1 这样的摘要;
library(dplyr)
library(stringr)
A1 <- A %>%
filter(str_detect(a1, "AB")) %>%
group_by(a2) %>%
summarize(n())
A2 <- A %>%
filter(str_detect(a1, "BF")) %>%
group_by(a2) %>%
summarize(n())
但是,我应该多次编写代码,以便我想在 str_detect 函数中输入 B table 的函数...如何编写函数?
我想这解决了您的问题:
lapply(B$b1,function(x)A%>%filter(str_detect(a1, x)) %>% group_by(a2) %>% summarize(n()))
这里我设计了一个函数count_fun
,它有四个参数。 dat
和A
一样是数据框,Scol
是字符串列,Gcol
是分组列,String
是测试字符串。请参阅 https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html 了解如何使用 dplyr
.
library(dplyr)
library(stringr)
count_fun <- function(dat, Scol, Gcol, String){
Scol <- enquo(Scol)
Gcol <- enquo(Gcol)
dat2 <- dat %>%
filter(str_detect(!!Scol, String)) %>%
group_by(!!Gcol) %>%
summarize(n())
return(dat2)
}
count_fun(A, a1, a2, "AB")
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 2
# 2 DRINK 2
count_fun(A, a1, a2, "BF")
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 1
# 2 DRINK 1
然后我们可以应用 count_fun
使用 lapply
循环遍历 B
.
lapply(B$b1, function(x){
count_fun(A, a1, a2, x)
})
# [[1]]
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 2
# 2 DRINK 2
#
# [[2]]
# # A tibble: 2 x 2
# a2 `n()`
# <chr> <int>
# 1 CAFE 1
# 2 DRINK 1
数据
A <- read.table(text = "a1 a2
ABC CAFE
ABD DRINK
ABF CAFE
ABFF DRINK
",
header = TRUE, stringsAsFactors = FALSE)
B <- data.frame(b1 = c("AB", "BF"), stringsAsFactors = FALSE)