Apply across rows in dbplyr (postgresql). Getting "Error: Unknown input type: pairlist"

Apply across rows in dbplyr (postgresql). Getting "Error: Unknown input type: pairlist"

我正在尝试根据三列中至少一列的值不是“”的值进行过滤。

我的代码在 data.frame(或 tibble)上运行,但在原始 DBI table 上我得到一个错误:

Error: Unknown input type: pairlist

library(tidyverse)
library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
#> The following object is masked from 'package:tidyr':
#> 
#>     extract

set.seed(1234)

df =
  tibble(
    gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
    gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
    gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
    gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
    gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
  )

df
#> # A tibble: 20 x 5
#>    gt_varscan   gt_gatk   gt_samtools   gt_freebayes   gt_vardict  
#>    <chr>        <chr>     <chr>         <chr>          <chr>       
#>  1 ""           ""        ""            ""             "gt_vardict"
#>  2 ""           ""        "gt_samtools" ""             ""          
#>  3 ""           ""        ""            "gt_freebayes" "gt_vardict"
#>  4 ""           ""        ""            ""             ""          
#>  5 "gt_varscan" ""        ""            "gt_freebayes" "gt_vardict"
#>  6 ""           ""        "gt_samtools" "gt_freebayes" ""          
#>  7 "gt_varscan" "gt_gatk" "gt_samtools" ""             "gt_vardict"
#>  8 "gt_varscan" ""        "gt_samtools" "gt_freebayes" ""          
#>  9 "gt_varscan" ""        "gt_samtools" "gt_freebayes" ""          
#> 10 ""           ""        ""            ""             "gt_vardict"
#> 11 ""           "gt_gatk" ""            ""             "gt_vardict"
#> 12 ""           ""        ""            "gt_freebayes" "gt_vardict"
#> 13 ""           "gt_gatk" ""            "gt_freebayes" "gt_vardict"
#> 14 "gt_varscan" "gt_gatk" ""            "gt_freebayes" ""          
#> 15 ""           "gt_gatk" "gt_samtools" ""             "gt_vardict"
#> 16 ""           ""        ""            ""             ""          
#> 17 ""           "gt_gatk" "gt_samtools" ""             ""          
#> 18 "gt_varscan" ""        "gt_samtools" "gt_freebayes" "gt_vardict"
#> 19 ""           ""        ""            ""             ""          
#> 20 ""           "gt_gatk" "gt_samtools" "gt_freebayes" ""

col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)

df %>%
  filter(apply(select(., !!col_list), 1, function(x) any( x != "")))
#> # A tibble: 15 x 5
#>    gt_varscan   gt_gatk   gt_samtools   gt_freebayes   gt_vardict  
#>    <chr>        <chr>     <chr>         <chr>          <chr>       
#>  1 ""           ""        "gt_samtools" ""             ""          
#>  2 ""           ""        ""            "gt_freebayes" "gt_vardict"
#>  3 "gt_varscan" ""        ""            "gt_freebayes" "gt_vardict"
#>  4 ""           ""        "gt_samtools" "gt_freebayes" ""          
#>  5 "gt_varscan" "gt_gatk" "gt_samtools" ""             "gt_vardict"
#>  6 "gt_varscan" ""        "gt_samtools" "gt_freebayes" ""          
#>  7 "gt_varscan" ""        "gt_samtools" "gt_freebayes" ""          
#>  8 ""           "gt_gatk" ""            ""             "gt_vardict"
#>  9 ""           ""        ""            "gt_freebayes" "gt_vardict"
#> 10 ""           "gt_gatk" ""            "gt_freebayes" "gt_vardict"
#> 11 "gt_varscan" "gt_gatk" ""            "gt_freebayes" ""          
#> 12 ""           "gt_gatk" "gt_samtools" ""             "gt_vardict"
#> 13 ""           "gt_gatk" "gt_samtools" ""             ""          
#> 14 "gt_varscan" ""        "gt_samtools" "gt_freebayes" "gt_vardict"
#> 15 ""           "gt_gatk" "gt_samtools" "gt_freebayes" ""

reprex package (v0.3.0)

于 2021 年 1 月 15 日创建
library(DBI)
library(tidyverse)
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql
library(bigrquery)
library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
#> The following object is masked from 'package:tidyr':
#> 
#>     extract


bq_auth(email="ariel.balter@gmail.com")

bq_conn = DBI::dbConnect(
  bigrquery::bigquery(),
  project = bq_test_project(),
  dataset = "test_dataset"
)

set.seed(1234)

df =
  tibble(
    gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
    gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
    gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
    gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
    gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
  )

dbWriteTable(
  bq_conn,
  "test_table",
  df,
  overwrite = T
)

df_bq = tbl(bq_conn, "test_table")
df_bq %>% collect()
#> # A tibble: 20 x 5
#>    gt_samtools   gt_gatk   gt_vardict   gt_freebayes   gt_varscan  
#>    <chr>         <chr>     <chr>        <chr>          <chr>       
#>  1 "gt_samtools" ""        ""           ""             ""          
#>  2 ""            ""        ""           ""             ""          
#>  3 "gt_samtools" ""        ""           "gt_freebayes" ""          
#>  4 "gt_samtools" ""        ""           "gt_freebayes" "gt_varscan"
#>  5 "gt_samtools" ""        ""           "gt_freebayes" "gt_varscan"
#>  6 ""            ""        ""           ""             ""          
#>  7 ""            ""        ""           ""             ""          
#>  8 ""            ""        "gt_vardict" ""             ""          
#>  9 ""            ""        "gt_vardict" "gt_freebayes" ""          
#> 10 ""            ""        "gt_vardict" "gt_freebayes" "gt_varscan"
#> 11 ""            ""        "gt_vardict" ""             ""          
#> 12 ""            ""        "gt_vardict" "gt_freebayes" ""          
#> 13 "gt_samtools" ""        "gt_vardict" "gt_freebayes" "gt_varscan"
#> 14 ""            "gt_gatk" ""           "gt_freebayes" "gt_varscan"
#> 15 "gt_samtools" "gt_gatk" ""           ""             ""          
#> 16 "gt_samtools" "gt_gatk" ""           "gt_freebayes" ""          
#> 17 "gt_samtools" "gt_gatk" "gt_vardict" ""             "gt_varscan"
#> 18 ""            "gt_gatk" "gt_vardict" ""             ""          
#> 19 ""            "gt_gatk" "gt_vardict" "gt_freebayes" ""          
#> 20 "gt_samtools" "gt_gatk" "gt_vardict" ""             ""

col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)

df_bq %>%
  filter(apply(select(., !!col_list), 1, function(x) any(x!="")))
#> Error: Unknown input type: pairlist

reprex package (v0.3.0)

于 2021 年 1 月 15 日创建

原来只有 #3(从下面)适用于 DBI 表。

library(dplyr)
df %>% filter_at(col_list, any_vars(. != ''))

以下在数据帧上运行良好。

#1.
df %>% filter(apply(select(., all_of(col_list)), 1, function(x) any( x != "")))
#2.
df %>% filter(Reduce(`|`, across(all_of(col_list), ~.x != '')))
#3.
df %>% filter_at(col_list, any_vars(. != ''))