使用其他列中的值过滤列表列
Filtering list column with value in other column
我正在尝试使用包含在主 (?) 数据框的另一列中的值来过滤嵌套的 dataframe/list 列。不幸的是,我下面的代码不起作用,我认为问题在于我在 map_if
中引用 search_term
的方式不正确。我怀疑它需要整个 vector/column,而不仅仅是 search_term
的每一行的单个值。我使用 map_if 检查我要过滤的嵌套嵌套数据框是否不为空(检查 nested_data_nrow
中的行数)。感谢任何提示。非常感谢。低于我的代表。
library(tidyverse)
#> Warning: package 'dplyr' was built under R version 4.1.3
df_nest <- structure(list(nested_data = list(structure(list(places_bank_name = c(
"ZiraatBank",
"UniCredit Bank", "Sparkasse Bank", "Intesa Sanpaolo Banka",
"Raiffeisen BANK", "EKI"
)), row.names = c(NA, -6L), class = c(
"tbl_df",
"tbl", "data.frame"
)), structure(list(places_bank_name = c(
"Raiffeisen BANK (iPoslovnica)",
"KIB Banka", "Raiffeisen BANK - Agencija Bužim", "BBI Bosna Bank International"
)), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame")), structure(list(places_bank_name = "Casopis Financing"), row.names = c(
NA,
-1L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
places_bank_name = "UniCredit Bank"
), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"))), nested_data_nrow = c(
6,
4, 1, 1
), search_term = c(
"BBI|Bosna Banka", "Komercijalno-investiciona|KIB",
"Raiffeisen", "UniCredit"
), bank_name = c(
"Bosna Banka International",
"Komercijalno-investiciona banka d.d. V.Kladusa", "Raiffeisen Bank d.d. BiH",
"UniCredit d.d. Mostar"
)), row.names = c(NA, -4L), class = c(
"tbl_df",
"tbl", "data.frame"
))
df_nest
#> # A tibble: 4 x 4
#> nested_data nested_data_nrow search_term bank_name
#> <list> <dbl> <chr> <chr>
#> 1 <tibble [6 x 1]> 6 BBI|Bosna Banka Bosna Banka I~
#> 2 <tibble [4 x 1]> 4 Komercijalno-investiciona|KIB Komercijalno-~
#> 3 <tibble [1 x 1]> 1 Raiffeisen Raiffeisen Ba~
#> 4 <tibble [1 x 1]> 1 UniCredit UniCredit d.d~
df_nest_filtered <- df_nest %>%
mutate(nest_data_filtered = map_if(
nested_data,
nested_data_nrow > 0,
~ .x %>%
filter(str_detect(
places_bank_name,
regex(search_term, ignore_case = T)
))
)) %>%
mutate(nest_data_filtered_nrow = map_dbl(nest_data_filtered, nrow)) %>%
select(bank_name, search_term, everything())
#> Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
#> opts(pattern)): longer object length is not a multiple of shorter object length
#> Error in `mutate()`:
#> ! Problem while computing `nest_data_filtered = map_if(...)`.
#> Caused by error in `filter()`:
#> ! Problem while computing `..1 = str_detect(places_bank_name,
#> regex(search_term, ignore_case = T))`.
#> x Input `..1` must be of size 1, not size 4.
由 reprex package (v2.0.1)
于 2022-04-13 创建
map_if
中有多个列,这会造成长度问题,因为 nested_data
的每个元素都应与对应的 'search_term' 相匹配。因此,pmap
会更好
library(dplyr)
library(purrr)
library(stringr)
df_nest2 <- df_nest %>%
mutate(nest_data_filtered = pmap(across(
c(nested_data, nested_data_nrow, search_term)),
~ if(..2 > 0) ..1 %>%
filter(str_detect(places_bank_name,
regex(..3, ignore_case = TRUE)) ) else ..1))
-输出
> df_nest2$search_term
[1] "BBI|Bosna Banka"
[2] "Komercijalno-investiciona|KIB"
[3] "Raiffeisen"
[4] "UniCredit"
> df_nest2$nest_data_filtered
[[1]]
# A tibble: 0 × 1
# … with 1 variable: places_bank_name <chr>
[[2]]
# A tibble: 1 × 1
places_bank_name
<chr>
1 KIB Banka
[[3]]
# A tibble: 0 × 1
# … with 1 variable: places_bank_name <chr>
[[4]]
# A tibble: 1 × 1
places_bank_name
<chr>
1 UniCredit Bank
或者另一个选项是 map2
和 case_when
df_nest %>%
mutate(nest_data_filtered = case_when(nested_data_nrow > 0
~ map2(nested_data, search_term,
~ .x %>% filter(str_detect(places_bank_name,
regex(.y, ignore_case = TRUE)))), TRUE ~ nested_data))
我正在尝试使用包含在主 (?) 数据框的另一列中的值来过滤嵌套的 dataframe/list 列。不幸的是,我下面的代码不起作用,我认为问题在于我在 map_if
中引用 search_term
的方式不正确。我怀疑它需要整个 vector/column,而不仅仅是 search_term
的每一行的单个值。我使用 map_if 检查我要过滤的嵌套嵌套数据框是否不为空(检查 nested_data_nrow
中的行数)。感谢任何提示。非常感谢。低于我的代表。
library(tidyverse)
#> Warning: package 'dplyr' was built under R version 4.1.3
df_nest <- structure(list(nested_data = list(structure(list(places_bank_name = c(
"ZiraatBank",
"UniCredit Bank", "Sparkasse Bank", "Intesa Sanpaolo Banka",
"Raiffeisen BANK", "EKI"
)), row.names = c(NA, -6L), class = c(
"tbl_df",
"tbl", "data.frame"
)), structure(list(places_bank_name = c(
"Raiffeisen BANK (iPoslovnica)",
"KIB Banka", "Raiffeisen BANK - Agencija Bužim", "BBI Bosna Bank International"
)), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame")), structure(list(places_bank_name = "Casopis Financing"), row.names = c(
NA,
-1L
), class = c("tbl_df", "tbl", "data.frame")), structure(list(
places_bank_name = "UniCredit Bank"
), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"))), nested_data_nrow = c(
6,
4, 1, 1
), search_term = c(
"BBI|Bosna Banka", "Komercijalno-investiciona|KIB",
"Raiffeisen", "UniCredit"
), bank_name = c(
"Bosna Banka International",
"Komercijalno-investiciona banka d.d. V.Kladusa", "Raiffeisen Bank d.d. BiH",
"UniCredit d.d. Mostar"
)), row.names = c(NA, -4L), class = c(
"tbl_df",
"tbl", "data.frame"
))
df_nest
#> # A tibble: 4 x 4
#> nested_data nested_data_nrow search_term bank_name
#> <list> <dbl> <chr> <chr>
#> 1 <tibble [6 x 1]> 6 BBI|Bosna Banka Bosna Banka I~
#> 2 <tibble [4 x 1]> 4 Komercijalno-investiciona|KIB Komercijalno-~
#> 3 <tibble [1 x 1]> 1 Raiffeisen Raiffeisen Ba~
#> 4 <tibble [1 x 1]> 1 UniCredit UniCredit d.d~
df_nest_filtered <- df_nest %>%
mutate(nest_data_filtered = map_if(
nested_data,
nested_data_nrow > 0,
~ .x %>%
filter(str_detect(
places_bank_name,
regex(search_term, ignore_case = T)
))
)) %>%
mutate(nest_data_filtered_nrow = map_dbl(nest_data_filtered, nrow)) %>%
select(bank_name, search_term, everything())
#> Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
#> opts(pattern)): longer object length is not a multiple of shorter object length
#> Error in `mutate()`:
#> ! Problem while computing `nest_data_filtered = map_if(...)`.
#> Caused by error in `filter()`:
#> ! Problem while computing `..1 = str_detect(places_bank_name,
#> regex(search_term, ignore_case = T))`.
#> x Input `..1` must be of size 1, not size 4.
由 reprex package (v2.0.1)
于 2022-04-13 创建map_if
中有多个列,这会造成长度问题,因为 nested_data
的每个元素都应与对应的 'search_term' 相匹配。因此,pmap
会更好
library(dplyr)
library(purrr)
library(stringr)
df_nest2 <- df_nest %>%
mutate(nest_data_filtered = pmap(across(
c(nested_data, nested_data_nrow, search_term)),
~ if(..2 > 0) ..1 %>%
filter(str_detect(places_bank_name,
regex(..3, ignore_case = TRUE)) ) else ..1))
-输出
> df_nest2$search_term
[1] "BBI|Bosna Banka"
[2] "Komercijalno-investiciona|KIB"
[3] "Raiffeisen"
[4] "UniCredit"
> df_nest2$nest_data_filtered
[[1]]
# A tibble: 0 × 1
# … with 1 variable: places_bank_name <chr>
[[2]]
# A tibble: 1 × 1
places_bank_name
<chr>
1 KIB Banka
[[3]]
# A tibble: 0 × 1
# … with 1 variable: places_bank_name <chr>
[[4]]
# A tibble: 1 × 1
places_bank_name
<chr>
1 UniCredit Bank
或者另一个选项是 map2
和 case_when
df_nest %>%
mutate(nest_data_filtered = case_when(nested_data_nrow > 0
~ map2(nested_data, search_term,
~ .x %>% filter(str_detect(places_bank_name,
regex(.y, ignore_case = TRUE)))), TRUE ~ nested_data))