使用 dplyr 动态创建列

Create columns dynamically with dplyr

我有一个包含 8 位数字的列,我想从中创建另外 8 个列。每列必须包含n个数字,范围从1到8。到目前为止,我已经成功完成了以下操作:

consolidado = tablas %>% 
        bind_rows() %>% 
        arrange(fecha) %>% 
        clean_names() %>% 
        mutate(n1 = str_extract(codigo, '[0-9]{1}'),
               n2 = str_extract(codigo, '[0-9]{2}'),
               n3 = str_extract(codigo, '[0-9]{3}'),
               n4 = str_extract(codigo, '[0-9]{4}'),
               n5 = str_extract(codigo, '[0-9]{5}'),
               n6 = str_extract(codigo, '[0-9]{6}'),
               n7 = str_extract(codigo, '[0-9]{7}'),
               n8 = str_extract(codigo, '[0-9]{8}'))

在 mutate 中创建的每一列都包含所需数量的数字。但是,我想知道是否有更短、更优雅的方法来解决这个问题。

这是数据集前 10 行的一些示例数据:

structure(list(concepto = c("ACTIVO", "ACTIVO", "ACTIVO", "ACTIVO", 
"ACTIVO", "ACTIVO", "ACTIVO", "ACTIVO", "ACTIVO", "ACTIVO"), 
    fecha = structure(c(13879, 13879, 13879, 13879, 13879, 13879, 
    13879, 13879, 13879, 13879), class = "Date"), codigo = c("10000000", 
    "10000000", "10000000", "10000000", "10000000", "10000000", 
    "10000000", "10000000", "10000000", "10000000"), bancos = c("BCR   Banco de Costa Rica", 
    "BNCR   Banco Nacional de Costa Rica", "BANHVI   Banco Hipotecario de la Vivienda", 
    "POPULAR   Banco Popular y de desarrollo Comunal", "BANCO BAC SAN JOSE S A", 
    "BANCO BCT S A", "Banco Cathay de Costa Rica S A", "Banco Davivienda Costa Rica Sociedad Anonima", 
    "BANCO GENERAL COSTA RICA SOCIEDAD ANONIMA", "BANCO IMPROSA S A"
    ), valor = c(1675930811215, 2745357417558, 57759616119, 1119228752031, 
    658336496744, 71173584265, 24596619019, 469100562766, 6068169547, 
    202528077603), n1 = c("1", "1", "1", "1", "1", "1", "1", 
    "1", "1", "1"), n2 = c("10", "10", "10", "10", "10", "10", 
    "10", "10", "10", "10"), n3 = c("100", "100", "100", "100", 
    "100", "100", "100", "100", "100", "100"), n4 = c("1000", 
    "1000", "1000", "1000", "1000", "1000", "1000", "1000", "1000", 
    "1000"), n5 = c("10000", "10000", "10000", "10000", "10000", 
    "10000", "10000", "10000", "10000", "10000"), n6 = c("100000", 
    "100000", "100000", "100000", "100000", "100000", "100000", 
    "100000", "100000", "100000"), n7 = c("1000000", "1000000", 
    "1000000", "1000000", "1000000", "1000000", "1000000", "1000000", 
    "1000000", "1000000"), n8 = c("10000000", "10000000", "10000000", 
    "10000000", "10000000", "10000000", "10000000", "10000000", 
    "10000000", "10000000")), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))

在此先感谢您提供的任何帮助!

我们可以使用 map_dfcset_names:

library(tidyverse)

consolidado %>% 
  mutate(map_dfc(set_names(1:8, paste0("n", 1:8)),
             ~ str_extract(codigo, paste0('[0-9]{',.x,'}'))))

#> # A tibble: 10 × 13
#>    concepto fecha      codigo bancos   valor n1    n2    n3    n4    n5    n6   
#>    <chr>    <date>     <chr>  <chr>    <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 ACTIVO   2008-01-01 10000… BCR  … 1.68e12 1     10    100   1000  10000 1000…
#>  2 ACTIVO   2008-01-01 10000… BNCR … 2.75e12 1     10    100   1000  10000 1000…
#>  3 ACTIVO   2008-01-01 10000… BANHV… 5.78e10 1     10    100   1000  10000 1000…
#>  4 ACTIVO   2008-01-01 10000… POPUL… 1.12e12 1     10    100   1000  10000 1000…
#>  5 ACTIVO   2008-01-01 10000… BANCO… 6.58e11 1     10    100   1000  10000 1000…
#>  6 ACTIVO   2008-01-01 10000… BANCO… 7.12e10 1     10    100   1000  10000 1000…
#>  7 ACTIVO   2008-01-01 10000… Banco… 2.46e10 1     10    100   1000  10000 1000…
#>  8 ACTIVO   2008-01-01 10000… Banco… 4.69e11 1     10    100   1000  10000 1000…
#>  9 ACTIVO   2008-01-01 10000… BANCO… 6.07e 9 1     10    100   1000  10000 1000…
#> 10 ACTIVO   2008-01-01 10000… BANCO… 2.03e11 1     10    100   1000  10000 1000…
#> # … with 2 more variables: n7 <chr>, n8 <chr>

reprex package (v0.3.0)

于 2022-04-30 创建

另一个选项是 dplyover::over(免责声明:我是软件包维护者)。这使语法更具可读性:

library(tidyverse)
library(dplyover) # https://timteafan.github.io/dplyover/

consolidado %>% 
  mutate(over(1:8,
              ~ str_extract(codigo, paste0('[0-9]{',.x,'}')),
              .names = "n{x}"))

#> # A tibble: 10 × 13
#>    concepto fecha      codigo bancos   valor n1    n2    n3    n4    n5    n6   
#>    <chr>    <date>     <chr>  <chr>    <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 ACTIVO   2008-01-01 10000… BCR  … 1.68e12 1     10    100   1000  10000 1000…
#>  2 ACTIVO   2008-01-01 10000… BNCR … 2.75e12 1     10    100   1000  10000 1000…
#>  3 ACTIVO   2008-01-01 10000… BANHV… 5.78e10 1     10    100   1000  10000 1000…
#>  4 ACTIVO   2008-01-01 10000… POPUL… 1.12e12 1     10    100   1000  10000 1000…
#>  5 ACTIVO   2008-01-01 10000… BANCO… 6.58e11 1     10    100   1000  10000 1000…
#>  6 ACTIVO   2008-01-01 10000… BANCO… 7.12e10 1     10    100   1000  10000 1000…
#>  7 ACTIVO   2008-01-01 10000… Banco… 2.46e10 1     10    100   1000  10000 1000…
#>  8 ACTIVO   2008-01-01 10000… Banco… 4.69e11 1     10    100   1000  10000 1000…
#>  9 ACTIVO   2008-01-01 10000… BANCO… 6.07e 9 1     10    100   1000  10000 1000…
#> 10 ACTIVO   2008-01-01 10000… BANCO… 2.03e11 1     10    100   1000  10000 1000…
#> # … with 2 more variables: n7 <chr>, n8 <chr>

reprex package (v0.3.0)

于 2022-04-30 创建