在 dplyr 管道循环中使用列名
Using column name in loop with dplyr pipes
我有一个简单的问题,但找不到答案。我将使用一个可重现的示例来解释我的问题:我有一个数据集(“aus_production”,tsibbledata
包的数据集)并且我想 运行 每个变量的 ARIMA - “Beer” , "烟草", "砖", "水泥", "电"和"煤气"。
library(tidyverse)
library(tsibble)
library(fable)
library(tsibbledata)
# Importing dataset
aus_production <- tsibbledata::aus_production
# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
fit <- aus_production %>% model(arima = ARIMA(i))
print(fit)
}
我的问题是:如何循环列名(sting)并一次将一个列放入 dplyr
管道中?问题出在i = "Beer",不是Beer(不带引号)。
在循环内部,代码 运行ned 是 fit <- aus_production %>% model(arima = ARIMA("Beer"))
,它给出了一个错误。正确的形式是 fit <- aus_production %>% model(arima = ARIMA(Beer))
(不带引号)。
我尝试了一些东西,比如:
fit <- aus_production %>% model(arima = ARIMA(aus_production[,i]))
或
fit <- aus_production %>% model(arima = ARIMA(aus_production$i))
我的问题并非来自 fable
包,我只是以它为例。
你可以用一个支点来做:
library(tidyverse)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, union
library(fable)
#> Loading required package: fabletools
library(tsibbledata)
library(broom)
out <- aus_production %>%
pivot_longer(-Quarter, names_to="var", values_to="vals") %>%
group_by(var) %>%
model(ARIMA(vals ~ 0 + pdq(0,1,1) + PDQ(0,1,1))) %>%
setNames(c("var", "model"))
out
#> # A mable: 6 x 2
#> # Key: var [6]
#> var model
#> <chr> <model>
#> 1 Beer <ARIMA(0,1,1)(0,1,1)[4]>
#> 2 Bricks <ARIMA(0,1,1)(0,1,1)[4]>
#> 3 Cement <ARIMA(0,1,1)(0,1,1)[4]>
#> 4 Electricity <ARIMA(0,1,1)(0,1,1)[4]>
#> 5 Gas <ARIMA(0,1,1)(0,1,1)[4]>
#> 6 Tobacco <ARIMA(0,1,1)(0,1,1)[4]>
out %>%
rowwise %>%
summarise(var = first(var),
mod = tidy(model)) %>%
unnest(mod)
#> # A tibble: 12 × 6
#> var term estimate std.error statistic p.value
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Beer ma1 -0.741 0.0411 -18.1 7.89e-45
#> 2 Beer sma1 -0.695 0.0569 -12.2 2.47e-26
#> 3 Bricks ma1 0.147 0.0694 2.12 3.49e- 2
#> 4 Bricks sma1 -0.859 0.0381 -22.5 5.89e-56
#> 5 Cement ma1 -0.258 0.0633 -4.07 6.57e- 5
#> 6 Cement sma1 -0.832 0.0408 -20.4 6.57e-52
#> 7 Electricity ma1 -0.556 0.0771 -7.22 9.22e-12
#> 8 Electricity sma1 -0.731 0.0396 -18.5 4.57e-46
#> 9 Gas ma1 -0.311 0.0714 -4.35 2.09e- 5
#> 10 Gas sma1 -0.557 0.0501 -11.1 6.08e-23
#> 11 Tobacco ma1 -0.807 0.0576 -14.0 4.38e-31
#> 12 Tobacco sma1 -0.749 0.0606 -12.4 4.13e-26
由 reprex package (v2.0.1)
于 2022-05-30 创建
要回答您原来的问题,您可以在字符变量字符串周围使用 !!sym()
:
# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
fit <- aus_production %>% model(arima = ARIMA(!!sym(i) ~ 0 + pdq(1,1,1)))
print(tidy(fit) %>% mutate(var = i))
}
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 -0.337 0.0810 -4.16 4.55e- 5 Beer
#> 2 arima ma1 -0.597 0.0663 -9.00 1.26e-16 Beer
#> 3 arima sar1 -0.814 0.115 -7.08 2.00e-11 Beer
#> 4 arima sma1 0.194 0.101 1.91 5.74e- 2 Beer
#> 5 arima sma2 -0.678 0.0671 -10.1 6.70e-20 Beer
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.264 0.0819 3.22 1.50e- 3 Tobacco
#> 2 arima ma1 -0.908 0.0379 -24.0 3.71e-59 Tobacco
#> 3 arima sar1 0.450 0.414 1.09 2.79e- 1 Tobacco
#> 4 arima sma1 -1.04 0.433 -2.40 1.73e- 2 Tobacco
#> 5 arima sma2 0.178 0.307 0.579 5.63e- 1 Tobacco
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.293 0.323 0.907 0.366 Bricks
#> 2 arima ma1 -0.137 0.330 -0.415 0.678 Bricks
#> 3 arima sar1 -0.830 0.236 -3.51 0.000553 Bricks
#> 4 arima sma1 0.00262 0.220 0.0119 0.991 Bricks
#> 5 arima sma2 -0.742 0.184 -4.03 0.0000792 Bricks
#> # A tibble: 3 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 -0.239 0.228 -1.05 2.96e- 1 Cement
#> 2 arima ma1 -0.0382 0.232 -0.164 8.70e- 1 Cement
#> 3 arima sma1 -0.823 0.0426 -19.3 1.09e-48 Cement
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.245 0.137 1.80 7.39e- 2 Electricity
#> 2 arima ma1 -0.739 0.107 -6.91 5.68e-11 Electricity
#> 3 arima sar1 0.893 0.0851 10.5 5.09e-21 Electricity
#> 4 arima sma1 -1.73 0.0941 -18.4 6.94e-46 Electricity
#> 5 arima sma2 0.791 0.0752 10.5 4.33e-21 Electricity
#> # A tibble: 3 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.700 0.0779 8.99 1.34e-16 Gas
#> 2 arima ma1 -0.936 0.0444 -21.1 4.67e-54 Gas
#> 3 arima sma1 -0.518 0.0586 -8.84 3.64e-16 Gas
由 reprex package (v2.0.1)
于 2022-05-30 创建
我有一个简单的问题,但找不到答案。我将使用一个可重现的示例来解释我的问题:我有一个数据集(“aus_production”,tsibbledata
包的数据集)并且我想 运行 每个变量的 ARIMA - “Beer” , "烟草", "砖", "水泥", "电"和"煤气"。
library(tidyverse)
library(tsibble)
library(fable)
library(tsibbledata)
# Importing dataset
aus_production <- tsibbledata::aus_production
# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
fit <- aus_production %>% model(arima = ARIMA(i))
print(fit)
}
我的问题是:如何循环列名(sting)并一次将一个列放入 dplyr
管道中?问题出在i = "Beer",不是Beer(不带引号)。
在循环内部,代码 运行ned 是 fit <- aus_production %>% model(arima = ARIMA("Beer"))
,它给出了一个错误。正确的形式是 fit <- aus_production %>% model(arima = ARIMA(Beer))
(不带引号)。
我尝试了一些东西,比如:
fit <- aus_production %>% model(arima = ARIMA(aus_production[,i]))
或
fit <- aus_production %>% model(arima = ARIMA(aus_production$i))
我的问题并非来自 fable
包,我只是以它为例。
你可以用一个支点来做:
library(tidyverse)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, union
library(fable)
#> Loading required package: fabletools
library(tsibbledata)
library(broom)
out <- aus_production %>%
pivot_longer(-Quarter, names_to="var", values_to="vals") %>%
group_by(var) %>%
model(ARIMA(vals ~ 0 + pdq(0,1,1) + PDQ(0,1,1))) %>%
setNames(c("var", "model"))
out
#> # A mable: 6 x 2
#> # Key: var [6]
#> var model
#> <chr> <model>
#> 1 Beer <ARIMA(0,1,1)(0,1,1)[4]>
#> 2 Bricks <ARIMA(0,1,1)(0,1,1)[4]>
#> 3 Cement <ARIMA(0,1,1)(0,1,1)[4]>
#> 4 Electricity <ARIMA(0,1,1)(0,1,1)[4]>
#> 5 Gas <ARIMA(0,1,1)(0,1,1)[4]>
#> 6 Tobacco <ARIMA(0,1,1)(0,1,1)[4]>
out %>%
rowwise %>%
summarise(var = first(var),
mod = tidy(model)) %>%
unnest(mod)
#> # A tibble: 12 × 6
#> var term estimate std.error statistic p.value
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Beer ma1 -0.741 0.0411 -18.1 7.89e-45
#> 2 Beer sma1 -0.695 0.0569 -12.2 2.47e-26
#> 3 Bricks ma1 0.147 0.0694 2.12 3.49e- 2
#> 4 Bricks sma1 -0.859 0.0381 -22.5 5.89e-56
#> 5 Cement ma1 -0.258 0.0633 -4.07 6.57e- 5
#> 6 Cement sma1 -0.832 0.0408 -20.4 6.57e-52
#> 7 Electricity ma1 -0.556 0.0771 -7.22 9.22e-12
#> 8 Electricity sma1 -0.731 0.0396 -18.5 4.57e-46
#> 9 Gas ma1 -0.311 0.0714 -4.35 2.09e- 5
#> 10 Gas sma1 -0.557 0.0501 -11.1 6.08e-23
#> 11 Tobacco ma1 -0.807 0.0576 -14.0 4.38e-31
#> 12 Tobacco sma1 -0.749 0.0606 -12.4 4.13e-26
由 reprex package (v2.0.1)
于 2022-05-30 创建要回答您原来的问题,您可以在字符变量字符串周围使用 !!sym()
:
# Running ARIMA for each variable
for (i in colnames(aus_production)[2:length(aus_production)]){
fit <- aus_production %>% model(arima = ARIMA(!!sym(i) ~ 0 + pdq(1,1,1)))
print(tidy(fit) %>% mutate(var = i))
}
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 -0.337 0.0810 -4.16 4.55e- 5 Beer
#> 2 arima ma1 -0.597 0.0663 -9.00 1.26e-16 Beer
#> 3 arima sar1 -0.814 0.115 -7.08 2.00e-11 Beer
#> 4 arima sma1 0.194 0.101 1.91 5.74e- 2 Beer
#> 5 arima sma2 -0.678 0.0671 -10.1 6.70e-20 Beer
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.264 0.0819 3.22 1.50e- 3 Tobacco
#> 2 arima ma1 -0.908 0.0379 -24.0 3.71e-59 Tobacco
#> 3 arima sar1 0.450 0.414 1.09 2.79e- 1 Tobacco
#> 4 arima sma1 -1.04 0.433 -2.40 1.73e- 2 Tobacco
#> 5 arima sma2 0.178 0.307 0.579 5.63e- 1 Tobacco
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.293 0.323 0.907 0.366 Bricks
#> 2 arima ma1 -0.137 0.330 -0.415 0.678 Bricks
#> 3 arima sar1 -0.830 0.236 -3.51 0.000553 Bricks
#> 4 arima sma1 0.00262 0.220 0.0119 0.991 Bricks
#> 5 arima sma2 -0.742 0.184 -4.03 0.0000792 Bricks
#> # A tibble: 3 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 -0.239 0.228 -1.05 2.96e- 1 Cement
#> 2 arima ma1 -0.0382 0.232 -0.164 8.70e- 1 Cement
#> 3 arima sma1 -0.823 0.0426 -19.3 1.09e-48 Cement
#> # A tibble: 5 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.245 0.137 1.80 7.39e- 2 Electricity
#> 2 arima ma1 -0.739 0.107 -6.91 5.68e-11 Electricity
#> 3 arima sar1 0.893 0.0851 10.5 5.09e-21 Electricity
#> 4 arima sma1 -1.73 0.0941 -18.4 6.94e-46 Electricity
#> 5 arima sma2 0.791 0.0752 10.5 4.33e-21 Electricity
#> # A tibble: 3 × 7
#> .model term estimate std.error statistic p.value var
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 arima ar1 0.700 0.0779 8.99 1.34e-16 Gas
#> 2 arima ma1 -0.936 0.0444 -21.1 4.67e-54 Gas
#> 3 arima sma1 -0.518 0.0586 -8.84 3.64e-16 Gas
由 reprex package (v2.0.1)
于 2022-05-30 创建