Apply across rows in dbplyr (postgresql). Getting "Error: Unknown input type: pairlist"
Apply across rows in dbplyr (postgresql). Getting "Error: Unknown input type: pairlist"
我正在尝试根据三列中至少一列的值不是“”的值进行过滤。
我的代码在 data.frame(或 tibble)上运行,但在原始 DBI table 上我得到一个错误:
Error: Unknown input type: pairlist
library(tidyverse)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
set.seed(1234)
df =
tibble(
gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
)
df
#> # A tibble: 20 x 5
#> gt_varscan gt_gatk gt_samtools gt_freebayes gt_vardict
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "" "" "" "" "gt_vardict"
#> 2 "" "" "gt_samtools" "" ""
#> 3 "" "" "" "gt_freebayes" "gt_vardict"
#> 4 "" "" "" "" ""
#> 5 "gt_varscan" "" "" "gt_freebayes" "gt_vardict"
#> 6 "" "" "gt_samtools" "gt_freebayes" ""
#> 7 "gt_varscan" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 8 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 9 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 10 "" "" "" "" "gt_vardict"
#> 11 "" "gt_gatk" "" "" "gt_vardict"
#> 12 "" "" "" "gt_freebayes" "gt_vardict"
#> 13 "" "gt_gatk" "" "gt_freebayes" "gt_vardict"
#> 14 "gt_varscan" "gt_gatk" "" "gt_freebayes" ""
#> 15 "" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 16 "" "" "" "" ""
#> 17 "" "gt_gatk" "gt_samtools" "" ""
#> 18 "gt_varscan" "" "gt_samtools" "gt_freebayes" "gt_vardict"
#> 19 "" "" "" "" ""
#> 20 "" "gt_gatk" "gt_samtools" "gt_freebayes" ""
col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)
df %>%
filter(apply(select(., !!col_list), 1, function(x) any( x != "")))
#> # A tibble: 15 x 5
#> gt_varscan gt_gatk gt_samtools gt_freebayes gt_vardict
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "" "" "gt_samtools" "" ""
#> 2 "" "" "" "gt_freebayes" "gt_vardict"
#> 3 "gt_varscan" "" "" "gt_freebayes" "gt_vardict"
#> 4 "" "" "gt_samtools" "gt_freebayes" ""
#> 5 "gt_varscan" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 6 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 7 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 8 "" "gt_gatk" "" "" "gt_vardict"
#> 9 "" "" "" "gt_freebayes" "gt_vardict"
#> 10 "" "gt_gatk" "" "gt_freebayes" "gt_vardict"
#> 11 "gt_varscan" "gt_gatk" "" "gt_freebayes" ""
#> 12 "" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 13 "" "gt_gatk" "gt_samtools" "" ""
#> 14 "gt_varscan" "" "gt_samtools" "gt_freebayes" "gt_vardict"
#> 15 "" "gt_gatk" "gt_samtools" "gt_freebayes" ""
由 reprex package (v0.3.0)
于 2021 年 1 月 15 日创建
library(DBI)
library(tidyverse)
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
library(bigrquery)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
bq_auth(email="ariel.balter@gmail.com")
bq_conn = DBI::dbConnect(
bigrquery::bigquery(),
project = bq_test_project(),
dataset = "test_dataset"
)
set.seed(1234)
df =
tibble(
gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
)
dbWriteTable(
bq_conn,
"test_table",
df,
overwrite = T
)
df_bq = tbl(bq_conn, "test_table")
df_bq %>% collect()
#> # A tibble: 20 x 5
#> gt_samtools gt_gatk gt_vardict gt_freebayes gt_varscan
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "gt_samtools" "" "" "" ""
#> 2 "" "" "" "" ""
#> 3 "gt_samtools" "" "" "gt_freebayes" ""
#> 4 "gt_samtools" "" "" "gt_freebayes" "gt_varscan"
#> 5 "gt_samtools" "" "" "gt_freebayes" "gt_varscan"
#> 6 "" "" "" "" ""
#> 7 "" "" "" "" ""
#> 8 "" "" "gt_vardict" "" ""
#> 9 "" "" "gt_vardict" "gt_freebayes" ""
#> 10 "" "" "gt_vardict" "gt_freebayes" "gt_varscan"
#> 11 "" "" "gt_vardict" "" ""
#> 12 "" "" "gt_vardict" "gt_freebayes" ""
#> 13 "gt_samtools" "" "gt_vardict" "gt_freebayes" "gt_varscan"
#> 14 "" "gt_gatk" "" "gt_freebayes" "gt_varscan"
#> 15 "gt_samtools" "gt_gatk" "" "" ""
#> 16 "gt_samtools" "gt_gatk" "" "gt_freebayes" ""
#> 17 "gt_samtools" "gt_gatk" "gt_vardict" "" "gt_varscan"
#> 18 "" "gt_gatk" "gt_vardict" "" ""
#> 19 "" "gt_gatk" "gt_vardict" "gt_freebayes" ""
#> 20 "gt_samtools" "gt_gatk" "gt_vardict" "" ""
col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)
df_bq %>%
filter(apply(select(., !!col_list), 1, function(x) any(x!="")))
#> Error: Unknown input type: pairlist
由 reprex package (v0.3.0)
于 2021 年 1 月 15 日创建
原来只有 #3(从下面)适用于 DBI 表。
library(dplyr)
df %>% filter_at(col_list, any_vars(. != ''))
以下在数据帧上运行良好。
#1.
df %>% filter(apply(select(., all_of(col_list)), 1, function(x) any( x != "")))
#2.
df %>% filter(Reduce(`|`, across(all_of(col_list), ~.x != '')))
#3.
df %>% filter_at(col_list, any_vars(. != ''))
我正在尝试根据三列中至少一列的值不是“”的值进行过滤。
我的代码在 data.frame(或 tibble)上运行,但在原始 DBI table 上我得到一个错误:
Error: Unknown input type: pairlist
library(tidyverse)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
set.seed(1234)
df =
tibble(
gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
)
df
#> # A tibble: 20 x 5
#> gt_varscan gt_gatk gt_samtools gt_freebayes gt_vardict
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "" "" "" "" "gt_vardict"
#> 2 "" "" "gt_samtools" "" ""
#> 3 "" "" "" "gt_freebayes" "gt_vardict"
#> 4 "" "" "" "" ""
#> 5 "gt_varscan" "" "" "gt_freebayes" "gt_vardict"
#> 6 "" "" "gt_samtools" "gt_freebayes" ""
#> 7 "gt_varscan" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 8 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 9 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 10 "" "" "" "" "gt_vardict"
#> 11 "" "gt_gatk" "" "" "gt_vardict"
#> 12 "" "" "" "gt_freebayes" "gt_vardict"
#> 13 "" "gt_gatk" "" "gt_freebayes" "gt_vardict"
#> 14 "gt_varscan" "gt_gatk" "" "gt_freebayes" ""
#> 15 "" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 16 "" "" "" "" ""
#> 17 "" "gt_gatk" "gt_samtools" "" ""
#> 18 "gt_varscan" "" "gt_samtools" "gt_freebayes" "gt_vardict"
#> 19 "" "" "" "" ""
#> 20 "" "gt_gatk" "gt_samtools" "gt_freebayes" ""
col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)
df %>%
filter(apply(select(., !!col_list), 1, function(x) any( x != "")))
#> # A tibble: 15 x 5
#> gt_varscan gt_gatk gt_samtools gt_freebayes gt_vardict
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "" "" "gt_samtools" "" ""
#> 2 "" "" "" "gt_freebayes" "gt_vardict"
#> 3 "gt_varscan" "" "" "gt_freebayes" "gt_vardict"
#> 4 "" "" "gt_samtools" "gt_freebayes" ""
#> 5 "gt_varscan" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 6 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 7 "gt_varscan" "" "gt_samtools" "gt_freebayes" ""
#> 8 "" "gt_gatk" "" "" "gt_vardict"
#> 9 "" "" "" "gt_freebayes" "gt_vardict"
#> 10 "" "gt_gatk" "" "gt_freebayes" "gt_vardict"
#> 11 "gt_varscan" "gt_gatk" "" "gt_freebayes" ""
#> 12 "" "gt_gatk" "gt_samtools" "" "gt_vardict"
#> 13 "" "gt_gatk" "gt_samtools" "" ""
#> 14 "gt_varscan" "" "gt_samtools" "gt_freebayes" "gt_vardict"
#> 15 "" "gt_gatk" "gt_samtools" "gt_freebayes" ""
由 reprex package (v0.3.0)
于 2021 年 1 月 15 日创建library(DBI)
library(tidyverse)
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
library(bigrquery)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
bq_auth(email="ariel.balter@gmail.com")
bq_conn = DBI::dbConnect(
bigrquery::bigquery(),
project = bq_test_project(),
dataset = "test_dataset"
)
set.seed(1234)
df =
tibble(
gt_varscan = sample(c("gt_varscan", ""), 20, replace=T),
gt_gatk = sample(c("gt_gatk", ""), 20, replace=T),
gt_samtools = sample(c("gt_samtools", ""), 20, replace=T),
gt_freebayes = sample(c("gt_freebayes", ""), 20, replace=T),
gt_vardict = sample(c("gt_vardict", ""), 20, replace=T)
)
dbWriteTable(
bq_conn,
"test_table",
df,
overwrite = T
)
df_bq = tbl(bq_conn, "test_table")
df_bq %>% collect()
#> # A tibble: 20 x 5
#> gt_samtools gt_gatk gt_vardict gt_freebayes gt_varscan
#> <chr> <chr> <chr> <chr> <chr>
#> 1 "gt_samtools" "" "" "" ""
#> 2 "" "" "" "" ""
#> 3 "gt_samtools" "" "" "gt_freebayes" ""
#> 4 "gt_samtools" "" "" "gt_freebayes" "gt_varscan"
#> 5 "gt_samtools" "" "" "gt_freebayes" "gt_varscan"
#> 6 "" "" "" "" ""
#> 7 "" "" "" "" ""
#> 8 "" "" "gt_vardict" "" ""
#> 9 "" "" "gt_vardict" "gt_freebayes" ""
#> 10 "" "" "gt_vardict" "gt_freebayes" "gt_varscan"
#> 11 "" "" "gt_vardict" "" ""
#> 12 "" "" "gt_vardict" "gt_freebayes" ""
#> 13 "gt_samtools" "" "gt_vardict" "gt_freebayes" "gt_varscan"
#> 14 "" "gt_gatk" "" "gt_freebayes" "gt_varscan"
#> 15 "gt_samtools" "gt_gatk" "" "" ""
#> 16 "gt_samtools" "gt_gatk" "" "gt_freebayes" ""
#> 17 "gt_samtools" "gt_gatk" "gt_vardict" "" "gt_varscan"
#> 18 "" "gt_gatk" "gt_vardict" "" ""
#> 19 "" "gt_gatk" "gt_vardict" "gt_freebayes" ""
#> 20 "gt_samtools" "gt_gatk" "gt_vardict" "" ""
col_list = c("gatk", "samtools", "freebayes") %>% paste0("gt_", .)
df_bq %>%
filter(apply(select(., !!col_list), 1, function(x) any(x!="")))
#> Error: Unknown input type: pairlist
由 reprex package (v0.3.0)
于 2021 年 1 月 15 日创建原来只有 #3(从下面)适用于 DBI 表。
library(dplyr)
df %>% filter_at(col_list, any_vars(. != ''))
以下在数据帧上运行良好。
#1.
df %>% filter(apply(select(., all_of(col_list)), 1, function(x) any( x != "")))
#2.
df %>% filter(Reduce(`|`, across(all_of(col_list), ~.x != '')))
#3.
df %>% filter_at(col_list, any_vars(. != ''))