使用 dplyr filter() 进行过滤——使用关系运算符
flitering with dplyr's filter() – using relational operators
我卡住了。如何使用 dplyr
's filter()
同时将我们两个关系运算符作为组内组内的过滤器
我得到了什么
数据,
# install.packages(c("tidyverse"), dependencies = TRUE)
library(tibble)
tbl <- structure(list(id1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), id2 = c("x_02", "x_02", "x_02", "x_02", "x_02", "x_02",
"x_02", "x_02", "x_02", "x_02", "x_02", "x_02", "x_02", "x_03", "x_03", "x_03",
"x_03", "x_03", "x_03", "x_03", "x_03"), x = c(-4L, -3L, -2L, -1L, 1L, 2L, 3L,
4L, 5L, -2L, -1L, 1L, 2L, -2L, -1L, 1L, 2L, 3L, 4L, 5L, 6L)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -21L),
vars = c("id1", "id2"), drop = TRUE, .Names = c("id1", "id2", "x"),
indices = list(0:8, 9:12, 13:20), group_sizes = c(9L, 4L, 8L),
biggest_group_size = 9L, labels = structure(list(id1 = c(1L, 2L, 2L),
id2 = c("x_02", "x_02", "x_03")), class = "data.frame", row.names = c(NA, -3L),
vars = c("id1", "id2"), drop = TRUE, .Names = c("id1", "id2")))
tbl
#> # A tibble: 21 x 3
#> # Groups: id1, id2 [3]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 -4
#> 2 1 x_02 -3
#> 3 1 x_02 -2
#> 4 1 x_02 -1
#> 5 1 x_02 1
#> 6 1 x_02 2
#> 7 1 x_02 3
#> 8 1 x_02 4
#> 9 1 x_02 5
#> 10 2 x_02 -2
#> # ... with 11 more rows
简而言之,我想在 id1
、id2
内查找,并找到从 x < -2
开始到 [=21= 结束的 x
系列](下面的期望结果比我在这里描述的更能说明问题)。
在某种程度上,我将其视为两个过滤器的组合,即我想要这个过滤器,
library(dplyr)
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == n() & x > 2 ) )
#> # A tibble: 2 x 3
#> # Groups: id1, id2 [2]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 5
#> 2 2 x_03 6
要与此过滤器结合使用,
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == 1 & x < -2 ) )
#> # A tibble: 1 x 3
#> # Groups: id1, id2 [1]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 -4
我想是这样的,但这并没有给我任何数据。
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == n() & x > 2 ) &
(row_number() == 1 & x < -2 ) )
为什么这个不给?
我想要得到的/想要的结果
# A tibble: 2 x 3
# Groups: id1, id2 [1]
id1 id2 x
<int> <chr> <int>
1 1 x_02 -4
1 1 x_02 5
当一个组内两个条件都满足时,该组应该有 2 行,所以再次过滤 n() == 2
tbl %>%
filter((row_number() == n() & x > 2 ) | (row_number() == 1 & x < -2 )) %>%
filter(n() == 2)
# A tibble: 2 x 3
# Groups: id1, id2 [1]
# id1 id2 x
# <int> <chr> <int>
# 1 1 x_02 -4
# 2 1 x_02 5
试试其他的(见评论):
tbl %>%
filter((row_number() == n() & x > 2 ) | (row_number() == 1 & x < -2 )) %>%
filter(n() == 2) %>%
distinct(id1, id2) %>%
left_join(., tbl, by=c("id1", "id2"))
我卡住了。如何使用 dplyr
's filter()
我得到了什么
数据,
# install.packages(c("tidyverse"), dependencies = TRUE)
library(tibble)
tbl <- structure(list(id1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), id2 = c("x_02", "x_02", "x_02", "x_02", "x_02", "x_02",
"x_02", "x_02", "x_02", "x_02", "x_02", "x_02", "x_02", "x_03", "x_03", "x_03",
"x_03", "x_03", "x_03", "x_03", "x_03"), x = c(-4L, -3L, -2L, -1L, 1L, 2L, 3L,
4L, 5L, -2L, -1L, 1L, 2L, -2L, -1L, 1L, 2L, 3L, 4L, 5L, 6L)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -21L),
vars = c("id1", "id2"), drop = TRUE, .Names = c("id1", "id2", "x"),
indices = list(0:8, 9:12, 13:20), group_sizes = c(9L, 4L, 8L),
biggest_group_size = 9L, labels = structure(list(id1 = c(1L, 2L, 2L),
id2 = c("x_02", "x_02", "x_03")), class = "data.frame", row.names = c(NA, -3L),
vars = c("id1", "id2"), drop = TRUE, .Names = c("id1", "id2")))
tbl
#> # A tibble: 21 x 3
#> # Groups: id1, id2 [3]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 -4
#> 2 1 x_02 -3
#> 3 1 x_02 -2
#> 4 1 x_02 -1
#> 5 1 x_02 1
#> 6 1 x_02 2
#> 7 1 x_02 3
#> 8 1 x_02 4
#> 9 1 x_02 5
#> 10 2 x_02 -2
#> # ... with 11 more rows
简而言之,我想在 id1
、id2
内查找,并找到从 x < -2
开始到 [=21= 结束的 x
系列](下面的期望结果比我在这里描述的更能说明问题)。
在某种程度上,我将其视为两个过滤器的组合,即我想要这个过滤器,
library(dplyr)
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == n() & x > 2 ) )
#> # A tibble: 2 x 3
#> # Groups: id1, id2 [2]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 5
#> 2 2 x_03 6
要与此过滤器结合使用,
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == 1 & x < -2 ) )
#> # A tibble: 1 x 3
#> # Groups: id1, id2 [1]
#> id1 id2 x
#> <int> <chr> <int>
#> 1 1 x_02 -4
我想是这样的,但这并没有给我任何数据。
tbl %>% group_by(id1, id2) %>%
filter( (row_number() == n() & x > 2 ) &
(row_number() == 1 & x < -2 ) )
为什么这个不给?
我想要得到的/想要的结果
# A tibble: 2 x 3
# Groups: id1, id2 [1]
id1 id2 x
<int> <chr> <int>
1 1 x_02 -4
1 1 x_02 5
当一个组内两个条件都满足时,该组应该有 2 行,所以再次过滤 n() == 2
tbl %>%
filter((row_number() == n() & x > 2 ) | (row_number() == 1 & x < -2 )) %>%
filter(n() == 2)
# A tibble: 2 x 3
# Groups: id1, id2 [1]
# id1 id2 x
# <int> <chr> <int>
# 1 1 x_02 -4
# 2 1 x_02 5
试试其他的(见评论):
tbl %>%
filter((row_number() == n() & x > 2 ) | (row_number() == 1 & x < -2 )) %>%
filter(n() == 2) %>%
distinct(id1, id2) %>%
left_join(., tbl, by=c("id1", "id2"))